Selaa lähdekoodia

Implement Zsh scraper

spamguy 6 kuukautta sitten
vanhempi
commit
fe827b3da3

+ 20 - 0
lib/docs/filters/zsh/clean_html.rb

@@ -0,0 +1,20 @@
+module Docs
+  class Zsh
+    class CleanHtmlFilter < Filter
+      def call
+        css('table.header', 'table.menu', 'hr').remove
+
+        # Remove indices from headers.
+        css('h1', 'h2', 'h3').each do |node|
+          node.content = node.content.match(/^[\d\.]* (.*)$/)&.captures&.first
+        end
+
+        css('h2.section ~ a').each do |node|
+          node.next_element['id'] = node['name']
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 41 - 0
lib/docs/filters/zsh/entries.rb

@@ -0,0 +1,41 @@
+module Docs
+  class Zsh
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        extract_header_text(at_css('h1.chapter').content)
+      end
+
+      def additional_entries
+        entries = []
+
+        css('h2.section').each do |node|
+          type = get_type
+
+          # Linkable anchor sits above <h2>.
+          a = node.xpath('preceding-sibling::a').last
+          header_text = extract_header_text(node.content)
+
+          if type == 'Zsh Modules'
+            module_name = header_text.match(/The (zsh\/.*) Module/)&.captures&.first
+            header_text = module_name if module_name.present?
+          end
+
+          entries << [header_text, a['name'], type] if header_text != 'Description'
+        end
+
+        entries
+      end
+
+      def get_type
+        extract_header_text(at_css('h1.chapter').content)
+      end
+
+      private
+
+      # Extracts text from a string, dropping indices preceding it.
+      def extract_header_text(str)
+        str.match(/^[\d\.]* (.*)$/)&.captures&.first
+      end
+    end
+  end
+end

+ 33 - 0
lib/docs/scrapers/zsh.rb

@@ -0,0 +1,33 @@
+module Docs
+  class Zsh < UrlScraper
+    self.type = 'zsh'
+    self.release = '5.9.0'
+    self.base_url = 'https://zsh.sourceforge.io/Doc/Release/'
+    self.root_path = 'index.html'
+    self.links = {
+      home: 'https://zsh.sourceforge.io/',
+      code: 'https://sourceforge.net/p/zsh/web/ci/master/tree/',
+    }
+
+    options[:skip] = %w(
+      zsh_toc.html
+      zsh_abt.html
+      The-Z-Shell-Manual.html
+      Introduction.html
+    )
+    options[:skip_patterns] = [/-Index.html/]
+
+    html_filters.push 'zsh/entries', 'zsh/clean_html'
+
+    options[:attribution] = <<-HTML
+      The Z Shell is copyright &copy; 1992&ndash;2017 Paul Falstad, Richard Coleman,
+ Zoltán Hidvégi, Andrew Main, Peter Stephenson, Sven Wischnowsky, and others.<br />
+      Licensed under the MIT License.
+    HTML
+
+    def get_latest_version(opts)
+      body = fetch('https://zsh.sourceforge.io/Doc/Release', opts)
+      body.scan(/, Zsh version ([0-9.]+)/)[0][0][0...-1]
+    end
+  end
+end

BIN
public/icons/docs/zsh/16.png


BIN
public/icons/docs/zsh/16@2x.png


+ 2 - 0
public/icons/docs/zsh/SOURCE

@@ -0,0 +1,2 @@
+https://sourceforge.net/p/zsh/web/ci/master/tree/favicon.png
+