Ver Fonte

Improve Ember.js scraper

Thibaut Courouble há 9 anos atrás
pai
commit
9386a2d368

+ 15 - 0
assets/stylesheets/global/_classes.scss

@@ -47,6 +47,21 @@
   @extend %heading-box;
 }
 
+%pre-heading {
+  padding: .375rem .625rem;
+  line-height: 1.5;
+  border-bottom-left-radius: 0;
+  border-bottom-right-radius: 0;
+  @extend %heading-box;
+
+  + pre {
+    border-top-left-radius: 0;
+    border-top-right-radius: 0;
+    border-top: 0;
+    margin-top: 0;
+  }
+}
+
 //
 // Notes
 //

+ 2 - 10
assets/stylesheets/pages/_bootstrap.scss

@@ -15,22 +15,14 @@
 
   .text-danger { @extend %label, %label-red; }
 
-  .bs-example {
+  p.bs-example {
     padding: .375rem .625rem;
     line-height: 1.5;
     @extend %heading-box;
   }
 
   div.bs-example {
-    border-bottom-left-radius: 0;
-    border-bottom-right-radius: 0;
-
-    + pre {
-      border-top-left-radius: 0;
-      border-top-right-radius: 0;
-      border-top: 0;
-      margin-top: 0;
-    }
+    @extend %pre-heading;
   }
 
   a.thumbnail {

+ 10 - 41
assets/stylesheets/pages/_ember.scss

@@ -1,53 +1,22 @@
 ._ember {
-  > .class-info { @extend %note, %note-blue; }
-  > .class-info > p { margin: 0; }
+  @extend %simple;
 
-  > .description > h2, > .description > h3 { font-size: 1rem; }
+  blockquote.class-info { @extend %note-blue; }
+  blockquote.class-info > p { margin: 0; }
 
-  .item-entry { padding-left: 1rem; }
+  .pre-title { @extend %pre-heading; }
 
-  .title {
-    margin-left: -1rem;
-    @extend %block-heading;
-
-    > h2, > .args, > .flag {
-      display: inline-block;
-      vertical-align: top;
-      margin: 0;
-      line-height: inherit;
-      font-size: inherit;
-    }
-
-    > .flag { // "static"
-      margin-left: .5em;
-      color: $textColorLight;
-    }
-
-    > .type {
-      float: right;
-      font-weight: normal;
-    }
-  }
-
-  .meta { // "defined in"
+  h2 > .flag, h2 > .type {
+    margin-left: .5em;
     color: $textColorLight;
-    margin-bottom: 1em;
+    font-weight: normal;
   }
 
-  .return, .params {
-    margin-top: 1.5em;
+  h2 > .type { float: right; }
 
-    > h3 {
-      display: inline-block;
-      vertical-align: top;
-      margin: 0 0 1em;
-      font-size: inherit;
-      @extend %label, %label-blue;
-    }
-  }
+  .meta { color: $textColorLight; }
 
   dl { margin: 0 1em; }
   dt + dt, dd + dt { margin-top: .5em; }
-
-  p > code { @extend %label; }
+  dt > code { @extend %label; }
 }

+ 56 - 19
lib/docs/filters/ember/clean_html.rb

@@ -2,7 +2,23 @@ module Docs
   class Ember
     class CleanHtmlFilter < Filter
       def call
-        root_page? ? root : other
+        css('hr', '.edit-page').remove
+
+        # Remove code highlighting
+        css('.highlight').each do |node|
+          node.before(%(<div class="pre-title"><code>#{node.at_css('thead').content.strip}</code></div>)) if node.at_css('thead')
+          node.content = node.at_css('.code pre').content
+          node.name = 'pre'
+          node['data-language'] = node['class'][/(javascript|js|html|hbs|handlebars)/, 1]
+          node['data-language'] = node['data-language'].sub(/(hbs|handlebars)/, 'html')
+        end
+
+        if base_url.path.start_with?('/api')
+          root_page? ? root : api
+        else
+          guide
+        end
+
         doc
       end
 
@@ -28,8 +44,8 @@ module Docs
         end
       end
 
-      def other
-        css(*%w(hr .edit-page #api-options .toc-anchor .inherited .protected .private .deprecated)).remove
+      def api
+        css('#api-options', '.toc-anchor', '.inherited').remove
 
         # Remove tabs and "Index"
         css('.tabs').each do |node|
@@ -41,24 +57,45 @@ module Docs
         css('.method', '.property', '.event').remove_attr('id')
 
         css('h3[data-id]').each do |node|
-          # Put id attributes on headings
-          node.name = 'h2'
-          node['id'] = node['data-id']
-          node.remove_attribute 'data-id'
-          node.content = node.content
-
-          # Move headings, span.args, etc. into a div.title
-          div = Nokogiri::XML::Node.new 'div', doc
-          div['class'] = 'title'
-          node.before(div).parent = div
-          div.add_child(div.next_element) while div.next_element.name == 'span'
+          heading = Nokogiri::XML::Node.new 'h2', doc
+          heading['id'] = node['data-id']
+          node.before(heading).remove
+          heading.content = node.content
+          heading.add_child(heading.next_element) while heading.next_element.name == 'span'
         end
 
-        # Remove code highlighting
-        css('.highlight').each do |node|
-          node.content = node.at_css('.code pre').content
-          node.name = 'pre'
-          node['data-language'] = node['class'][/(javascript|js|html)/, 1]
+        css('> .class-info').each do |node|
+          node.name = 'blockquote'
+        end
+
+        css('div.meta').each do |node|
+          node.name = 'p'
+        end
+
+        css('span.type').each do |node|
+          node.name = 'code'
+        end
+
+        css('.pane', '.item-entry').each do |node|
+          node.before(node.children).remove
+        end
+      end
+
+      def guide
+        @doc = at_css('article')
+
+        css('.previous-guide', '.next-guide').remove
+
+        css('img').each do |node|
+          node['src'] = node['src'].sub('https://guides.emberjs.com/', base_url.to_s)
+        end
+
+        css('h3, h4, h5').each do |node|
+          node.name = node.name.sub(/\d/) { |i| i.to_i - 1 }
+        end unless at_css('h2')
+
+        css('blockquote > p > em').each do |node|
+          node.before(node.children).remove
         end
       end
     end

+ 35 - 23
lib/docs/filters/ember/entries.rb

@@ -2,40 +2,52 @@ module Docs
   class Ember
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        name = at_css('.api-header').content.split.first
-        # Remove "Ember." prefix if the next character is uppercase
-        name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1'
-        name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name
+        if base_url.path.start_with?('/api')
+          name = at_css('.api-header').content.split.first
+          # Remove "Ember." prefix if the next character is uppercase
+          name.sub! %r{\AEmber\.([A-Z])(?!EATURES)}, '\1'
+          name == 'Handlebars.helpers' ? 'Handlebars Helpers' : name
+        else
+          name = at_css('article h1').content.remove('Edit Page').strip
+          name = at_css('li.toc-level-0.selected > a').content if name == 'Introduction'
+          name
+        end
       end
 
       def get_type
-        group = if css('p').any? { |node| node.content.include?('PRIVATE') }
-          'Private'
-        elsif css('p').any? { |node| node.content.include?('DEPRECATED') }
-          'Deprecated'
-        end
-
-        if at_css('.api-header').content.include?('Module')
-          'Modules'
-        elsif name.start_with? 'DS'
-          group ? "Data (#{group})" : 'Data'
-        elsif name.start_with? 'RSVP'
-          'RSVP'
-        elsif name.start_with? 'Test'
-          'Test'
+        if base_url.path.start_with?('/api')
+          if at_css('.api-header').content.include?('Module')
+            'Modules'
+          elsif name.start_with? 'DS'
+            'Data'
+          elsif name.start_with? 'RSVP'
+            'RSVP'
+          elsif name.start_with? 'Test'
+            'Test'
+          elsif name.start_with?('Ember')
+            name.split('.')[0..1].join('.')
+          else
+            name.split('.').first
+          end
         else
-          group || name
+          if node = at_css('li.toc-level-0.selected > a')
+            "Guide: #{node.content.strip}"
+          else
+            'Guide'
+          end
         end
       end
 
       def additional_entries
-        css('.item-entry').map do |node|
-          heading = node.at_css('h2')
+        return [] unless base_url.path.start_with?('/api')
+
+        css('.item-entry:not(.inherited)').map do |node|
+          heading = node.at_css('h3[data-id]')
           name = heading.content.strip
 
           if self.name == 'Handlebars Helpers'
             name << ' (handlebars helper)'
-            next [name, heading['id']]
+            next [name, heading['data-id']]
           end
 
           # Give their own type to "Ember.platform", "Ember.run", etc.
@@ -51,7 +63,7 @@ module Docs
           name << '()'     if node['class'].include? 'method'
           name << ' event' if node['class'].include? 'event'
 
-          [name, heading['id'], type]
+          [name, heading['data-id'], type]
         end
       end
     end

+ 39 - 4
lib/docs/scrapers/ember.rb

@@ -1,32 +1,67 @@
 module Docs
   class Ember < UrlScraper
+    class << self
+      attr_accessor :guide_url
+    end
+
     self.name = 'Ember.js'
     self.slug = 'ember'
     self.type = 'ember'
     self.release = '2.7.0'
     self.base_url = 'http://emberjs.com/api/'
+    self.guide_url = "https://guides.emberjs.com/v#{self.release}/"
+    self.initial_urls = [guide_url]
     self.links = {
       home: 'http://emberjs.com/',
       code: 'https://github.com/emberjs/ember.js'
     }
 
-    html_filters.push 'ember/clean_html', 'ember/entries', 'title'
+    html_filters.push 'ember/entries', 'ember/clean_html', 'title'
+
+    options[:trailing_slash] = false
 
     options[:title] = false
     options[:root_title] = 'Ember.js'
 
     options[:container] = ->(filter) do
-      filter.root_page? ? '#toc-list' : '#content'
+      if filter.base_url.path.start_with?('/api')
+        filter.root_page? ? '#toc-list' : '#content'
+      else
+        'main'
+      end
     end
 
     # Duplicates
     options[:skip] = %w(classes/String.html data/classes/DS.html)
-
-    options[:skip_patterns] = [/\._/]
+    options[:skip_patterns] = [/\._/, /contributing/]
 
     options[:attribution] = <<-HTML
       &copy; 2016 Yehuda Katz, Tom Dale and Ember.js contributors<br>
       Licensed under the MIT License.
     HTML
+
+    def guide_url
+      @guide_url ||= URL.parse(self.class.guide_url)
+    end
+
+    private
+
+    def process_url?(url)
+      base_url.contains?(url) || guide_url.contains?(url)
+    end
+
+    def process_response(response)
+      original_scheme = @base_url.scheme
+      original_host = @base_url.host
+      original_path = @base_url.path
+      @base_url.scheme = response.effective_url.scheme
+      @base_url.host = response.effective_url.host
+      @base_url.path = response.effective_url.path[/\A\/v[\d\.]+\//, 0] || '/api/'
+      super
+    ensure
+      @base_url.scheme = original_scheme
+      @base_url.host = original_host
+      @base_url.path = original_path
+    end
   end
 end