Pārlūkot izejas kodu

Merge pull request #2535 from NasaGeek/tcllib

Add tcllib scraper
Simon Legner 4 mēneši atpakaļ
vecāks
revīzija
59e990ac08

+ 4 - 0
assets/javascripts/news.json

@@ -1,4 +1,8 @@
 [
+  [
+    "2025-07-14",
+    "New documentation: <a href=\"/tcllib/\">Tcllib</a>"
+  ],
   [
     "2025-06-27",
     "New documentation: <a href=\"/zsh/\">Zsh</a>"

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 1200 - 1260
assets/javascripts/vendor/prism.js


+ 40 - 0
lib/docs/filters/tcllib/clean_html.rb

@@ -0,0 +1,40 @@
+module Docs
+  class Tcllib
+    class CleanHtmlFilter < Filter
+      def call
+        css("hr").remove()
+        xpath("./div/text()").remove() # Navigation text content e.g. [ | | | ]
+        css("div.markdown > a").remove() # Navigation links
+
+
+        # Fix up ToC links
+        css('a[name]').each do |node|
+          node.parent['id'] = node['name']
+          node.before(node.children).remove unless node['href']
+        end
+
+        # Relies on the above ToC fixup
+        keywords = at_css('#keywords')
+        if !keywords.nil?
+          keywords.next_sibling.remove()
+          keywords.remove()
+          css('a[href="#keywords"]').remove()
+        end
+
+        # Downrank headings for styling
+        css('h2').each do |node|
+          node.name = 'h3'
+        end
+        css('h1').each do |node|
+          node.name = 'h2'
+        end
+
+        css('pre').each do |node|
+          node['data-language'] = 'tcl'
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 22 - 0
lib/docs/filters/tcllib/entries.rb

@@ -0,0 +1,22 @@
+module Docs
+  class Tcllib
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        # The first word after the `NAME` heading
+        name = at_css('h1 + p')
+        return name.content.strip.split[0]
+      end
+
+      def get_type
+        # The types are the categories as indicated on each page (and on the
+        # root page, toc0.md)
+        category = at_css('a[name="category"]')
+        if !category.nil?
+          return category.parent.next.next.content
+        end
+        return 'Unfiled'
+      end
+    end
+  end
+end
+

+ 9 - 0
lib/docs/filters/tcllib/nop.rb

@@ -0,0 +1,9 @@
+module Docs
+  class Tcllib
+    class NopFilter < Filter
+      def call
+        doc
+      end
+    end
+  end
+end

+ 39 - 0
lib/docs/scrapers/tcllib.rb

@@ -0,0 +1,39 @@
+module Docs
+  class Tcllib < UrlScraper
+    self.name = 'Tcllib'
+    self.type = 'simple'
+    self.slug = 'tcllib'
+    self.release = '2.0'
+    self.base_url = 'https://core.tcl-lang.org/tcllib/doc/trunk/embedded/md/'
+    self.root_path = 'toc0.md'
+    self.links = {
+      home: 'https://core.tcl-lang.org/tcllib/doc/trunk/embedded/index.md',
+      code: 'https://sourceforge.net/projects/tcllib/files/tcllib/'
+    }
+
+    html_filters.push 'tcllib/entries', 'tcllib/clean_html', 'title'
+    # The docs have incorrect <base> elements, so we should just skip that
+    html_filters.replace('apply_base_url', 'tcllib/nop')
+
+    options[:root_title] = 'Tcllib Documentation'
+    options[:container] = '.content'
+    options[:skip] = [
+      # Full of broken links, path improperly duplicates "tcllib" segment
+      'tcllib/toc.md',
+      # The other ones aren't terribly useful
+      'toc.md', 'toc1.md', 'toc2.md',
+      # Keyword index
+      'index.md'
+    ]
+
+    options[:attribution] = <<-HTML
+      Licensed under the <a href="https://core.tcl-lang.org/tcllib/doc/trunk/embedded/md/tcllib/files/devdoc/tcllib_license.md">BSD license</a>
+    HTML
+
+
+    def get_latest_version(opts)
+      doc = fetch_doc('https://core.tcl-lang.org/tcllib/doc/trunk/embedded/index.md', opts)
+      doc.at_css('strong').content.scan(/([0-9.]+)/)[0][0]
+    end
+  end
+end

BIN
public/icons/docs/tcllib/16.png


BIN
public/icons/docs/tcllib/16@2x.png


+ 1 - 0
public/icons/docs/tcllib/SOURCE

@@ -0,0 +1 @@
+https://commons.wikimedia.org/wiki/File:Tcl.svg

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels