浏览代码

Merge pull request #2519 from spamguy/zsh

Implement Zsh scraper
Simon Legner 5 月之前
父节点
当前提交
fb622669fb

+ 4 - 0
assets/javascripts/news.json

@@ -1,4 +1,8 @@
 [
+  [
+    "2025-06-27",
+    "New documentation: <a href=\"/zsh/\">Zsh</a>"
+  ],
   [
     "2025-06-04",
     "New documentation: <a href=\"/es_toolkit/\">es-toolkit</a>"

+ 20 - 0
lib/docs/filters/zsh/clean_html.rb

@@ -0,0 +1,20 @@
+module Docs
+  class Zsh
+    class CleanHtmlFilter < Filter
+      def call
+        css('table.header', 'table.menu', 'hr').remove
+
+        # Remove indices from headers.
+        css('h1', 'h2', 'h3').each do |node|
+          node.content = node.content.match(/^[\d\.]* (.*)$/)&.captures&.first
+        end
+
+        css('h2.section ~ a').each do |node|
+          node.next_element['id'] = node['name']
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 74 - 0
lib/docs/filters/zsh/entries.rb

@@ -0,0 +1,74 @@
+module Docs
+  class Zsh
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        extract_header_text(at_css('h1.chapter').content)
+      end
+
+      def additional_entries
+        entries = []
+        used_fns = []
+        
+        css('h2.section').each do |node|
+          type = get_type
+          # Linkable anchor sits above <h2>.
+          a = node.xpath('preceding-sibling::a').last
+          header_text = extract_header_text(node.content)
+
+          case type
+          when 'Zsh Modules'
+            module_name = header_text.match(/The (zsh\/.* Module)/)&.captures&.first
+            header_text = module_name if module_name.present?
+          when 'Calendar Function System'
+            header_text << ' (Calendar)'
+          end
+
+          entries << [header_text, a['name'], type] unless header_text.start_with?('Description')
+        end
+
+        # Functions are documented within <dl> elements.
+        # Names are wrapped in <dt>, details within <dd>.
+        # <dd> can also contain anchors for the next function.
+        doc.css('> dl').each do |node|
+          type = get_type
+          fn_names = node.css('> dt')
+          node.css('dd a[name]').each_with_index do |anchor, i|
+            if fn_names[i].present? && anchor['name'].present?
+              fn_names[i]['id'] = anchor['name']
+
+              # Groups of functions are sometimes comma-delimited.
+              # Strip arguments, flags, etc. from function name.
+              # Skip flag-only headers.
+              fn_names[i].inner_html.split(', ').each do |fn|
+                fn.gsub!(/<(?:tt|var)>(.+?)<\/(?:tt|var)>/, '\1')
+                fn = fn.split(' ').first
+                fn.gsub!(/(?:[\[\(]).*(?:[\]\)]).*$/, '')
+
+                # Add context for operators.
+                fn << " (#{type})" if fn.length == 1
+
+                if fn.present? && !fn.match?(/^[\-\[]/) && !used_fns.include?(fn)
+                  used_fns << fn
+                  entries << [fn, anchor['name'], type]
+                end
+              end
+            end
+          end
+        end
+
+        entries
+      end
+
+      def get_type
+        extract_header_text(at_css('h1.chapter').content)
+      end
+
+      private
+
+      # Extracts text from a string, dropping indices preceding it.
+      def extract_header_text(str)
+        str.match(/^[\d\.]* (.*)$/)&.captures&.first
+      end
+    end
+  end
+end

+ 33 - 0
lib/docs/scrapers/zsh.rb

@@ -0,0 +1,33 @@
+module Docs
+  class Zsh < UrlScraper
+    self.type = 'zsh'
+    self.release = '5.9.0'
+    self.base_url = 'https://zsh.sourceforge.io/Doc/Release/'
+    self.root_path = 'index.html'
+    self.links = {
+      home: 'https://zsh.sourceforge.io/',
+      code: 'https://sourceforge.net/p/zsh/web/ci/master/tree/',
+    }
+
+    options[:skip] = %w(
+      zsh_toc.html
+      zsh_abt.html
+      The-Z-Shell-Manual.html
+      Introduction.html
+    )
+    options[:skip_patterns] = [/-Index.html/]
+
+    html_filters.push 'zsh/entries', 'zsh/clean_html'
+
+    options[:attribution] = <<-HTML
+      The Z Shell is copyright &copy; 1992&ndash;2017 Paul Falstad, Richard Coleman,
+ Zoltán Hidvégi, Andrew Main, Peter Stephenson, Sven Wischnowsky, and others.<br />
+      Licensed under the MIT License.
+    HTML
+
+    def get_latest_version(opts)
+      body = fetch('https://zsh.sourceforge.io/Doc/Release', opts)
+      body.scan(/Zsh version ([0-9.]+)/)[0][0]
+    end
+  end
+end

二进制
public/icons/docs/zsh/16.png


二进制
public/icons/docs/zsh/16@2x.png


+ 2 - 0
public/icons/docs/zsh/SOURCE

@@ -0,0 +1,2 @@
+https://sourceforge.net/p/zsh/web/ci/master/tree/favicon.png
+