Bläddra i källkod

Decouple Relay scraper from React scraper

Thibaut Courouble 8 år sedan
förälder
incheckning
6d580a9255
3 ändrade filer med 111 tillägg och 4 borttagningar
  1. 63 0
      lib/docs/filters/relay/clean_html.rb
  2. 43 0
      lib/docs/filters/relay/entries.rb
  3. 5 4
      lib/docs/scrapers/relay.rb

+ 63 - 0
lib/docs/filters/relay/clean_html.rb

@@ -0,0 +1,63 @@
+module Docs
+  class Relay
+    class CleanHtmlFilter < Filter
+      def call
+        @doc = at_css('.inner-content, article.withtoc')
+
+        if root_page?
+          at_css('h1').content = 'Relay Documentation'
+        end
+
+        css('.docs-prevnext', '.hash-link', '.edit-page-link', '.edit-github', 'a.hash', '.edit-page-block', 'a.show', 'a.hide', 'hr').remove
+
+        css('table h1', 'table h2', 'table h3').each do |node|
+          table = node
+          table = table.parent until table.name == 'table'
+          table.replace(node)
+        end
+
+        css('a.anchor', 'a.hashref').each do |node|
+          node.parent['id'] ||= node['name'] || node['id']
+        end
+
+        css('.highlight').each do |node|
+          node.name = 'pre'
+          node.css('.gutter').remove
+          node['data-language'] = node.at_css('[data-lang]').try(:[], 'data-lang') || 'js'
+          node.content = node.content.strip
+        end
+
+        css('table.highlighttable').each do |node|
+          node.replace(node.at_css('pre.highlight'))
+        end
+
+        css('.prism').each do |node|
+          node.name = 'pre'
+          node['data-language'] = node['class'][/(?<=language\-)(\w+)/]
+          node.content = node.content
+        end
+
+        css('blockquote > p:first-child').each do |node|
+          node.remove if node.content.strip == 'Note:'
+        end
+
+        css('h3#props', 'h3#methods').each { |node| node.name = 'h2' }
+        css('h4.propTitle').each { |node| node.name = 'h3' }
+
+        css('> div > div', '> div', 'div > span', '.props', '.prop').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('a pre', 'h3 .propType').each do |node|
+          node.name = 'code'
+        end
+
+        css('a[target]').each do |node|
+          node.remove_attribute('target')
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 43 - 0
lib/docs/filters/relay/entries.rb

@@ -0,0 +1,43 @@
+module Docs
+  class Relay
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        at_css('h1').children.select(&:text?).map(&:content).join.strip
+      end
+
+      def get_type
+        link = at_css('.nav-docs-section .active, .toc .active')
+        section = link.ancestors('.nav-docs-section, section').first
+        type = section.at_css('h3').content.strip
+        type
+      end
+
+      def additional_entries
+        entries = []
+
+        css('.inner-content h3 code, .inner-content h4 code').each do |node|
+          name = node.content
+          name.remove! %r{[#\(\)]}
+          name.remove! %r{\w+\:}
+          name.strip!
+          id = name.parameterize
+          node.parent['id'] = id
+          entries << [name, id, 'Reference']
+        end
+
+        css('.apiIndex a pre').each do |node|
+          next unless node.parent['href'].start_with?('#')
+          id = node.parent['href'].remove('#')
+          name = node.content.strip
+          sep = name.start_with?('static') ? '.' : '#'
+          name.remove! %r{(abstract|static) }
+          name.sub! %r{\(.*\)}, '()'
+          name.prepend(self.name + sep)
+          entries << [name, id]
+        end
+
+        entries
+      end
+    end
+  end
+end

+ 5 - 4
lib/docs/scrapers/relay.rb

@@ -1,6 +1,6 @@
 module Docs
-  class Relay < React
-    self.type = 'react'
+  class Relay < UrlScraper
+    self.type = 'simple'
     self.release = '1.4.1'
     self.base_url = 'https://facebook.github.io/relay/docs/'
     self.root_path = 'getting-started.html'
@@ -9,8 +9,9 @@ module Docs
       code: 'https://github.com/facebook/relay'
     }
 
-    options[:root_title] = 'Relay Documentation'
-    options[:only_patterns] = nil
+    html_filters.push 'relay/entries', 'relay/clean_html'
+
+    options[:container] = '.documentationContent'
     options[:skip] = %w(videos.html graphql-further-reading.html)
 
     options[:attribution] = <<-HTML