Ver Fonte

Finish Babel scraper

Thibaut Courouble há 7 anos atrás
pai
commit
7731a599e1

BIN
assets/images/docs-2.png


BIN
assets/images/docs-2@2x.png


+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2018-2-4",
-    "New documentations: <a href=\"/jekyll/\">Jekyll</a> and <a href=\"/jsdoc/\">JSDoc</a>"
+    "New documentations: <a href=\"/babel/\">Babel</a>, <a href=\"/jekyll/\">Jekyll</a> and <a href=\"/jsdoc/\">JSDoc</a>"
   ], [
     "2017-11-26",
     "New documentations: <a href=\"/bluebird/\">Bluebird</a>, <a href=\"/eslint/\">ESLint</a> and <a href=\"/homebrew/\">Homebrew</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -105,6 +105,11 @@ credits = [
     '2010-2017 Caolan McMahon',
     'MIT',
     'https://raw.githubusercontent.com/caolan/async/master/LICENSE'
+  ], [
+    'Babel',
+    '2018 Sebastian McKenzie',
+    'MIT',
+    'https://raw.githubusercontent.com/babel/website/master/LICENSE'
   ], [
     'Backbone.js',
     '2010-2016 Jeremy Ashkenas, DocumentCloud',

+ 0 - 1
assets/stylesheets/application-dark.css.scss

@@ -35,7 +35,6 @@
         'pages/angularjs',
         'pages/apache',
         'pages/async',
-        'pages/babel',
         'pages/bootstrap',
         'pages/c',
         'pages/cakephp',

+ 0 - 1
assets/stylesheets/application.css.scss

@@ -35,7 +35,6 @@
         'pages/angularjs',
         'pages/apache',
         'pages/async',
-        'pages/babel',
         'pages/bootstrap',
         'pages/c',
         'pages/cakephp',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -169,3 +169,4 @@
 ._icon-eslint:before        { background-position: -9rem -2rem; @extend %doc-icon-2; }
 ._icon-homebrew:before      { background-position: 0 -3rem; @extend %doc-icon-2; }
 ._icon-jekyll:before        { background-position: -1rem -3rem; @extend %doc-icon-2; }
+._icon-babel:before         { background-position: -2rem -3rem; @extend %doc-icon-2; }

+ 0 - 10
assets/stylesheets/pages/_babel.scss

@@ -1,10 +0,0 @@
-._babel {
-  @extend %simple;
-  ._note {
-    h1, h2, h3, h4, h5, h6 {
-      &:first-child {
-        margin: 0.5em 0;
-      }
-    }
-  }
-}

+ 18 - 56
lib/docs/filters/babel/clean_html.rb

@@ -2,72 +2,34 @@ module Docs
   class Babel
     class CleanHtmlFilter < Filter
       def call
-        css('.btn-clipboard').remove
-
-        css('div.highlighter-rouge').each do |node|
-          pre = node.at_css('pre')
-
-          # copy over the highlighting metadata
-          match = /language-(\w+)/.match(node['class'])
-          if match
-            lang = match[1]
-            if lang == 'sh'
-              lang = 'bash'
-            end
-            pre['class'] = nil
-            pre['data-language'] = lang
-          end
-
-          # Remove the server-rendered syntax highlighting
-          code = pre.at_css('code')
-          code.content = code.text
-
-          # Remove the div.highlighter-rouge and div.highlight wrapping the <pre>
-          node.add_next_sibling pre
-          node.remove
+        if root_page?
+          doc.inner_html = '<h1>Babel</h1>'
+          return doc
         end
 
+        header = at_css('.docs-header .col-md-12')
+        @doc = at_css('.docs-content')
+        doc.prepend_child(header)
 
-        css('blockquote').each do |node|
-          node.name = 'div'
-          node['class'] = '_note'
-        end
+        css('.btn-clipboard', '.package-links').remove
 
-        css((1..6).map { |n| "h#{n}" }).each do |header|
-          return unless header.at_css('a')
-          header.content = header.at_css('a').content
+        css('.col-md-12', 'h1 a', 'h2 a', 'h3 a', 'h4 a', 'h5 a', 'h5 a').each do |node|
+          node.before(node.children).remove
         end
 
+        css('div.highlighter-rouge').each do |node|
+          pre = node.at_css('pre')
 
-        header = doc # .docs-content
-          .parent # .row
-          .parent # .container
-          .previous_element # .docs_header
+          lang = node['class'][/language-(\w+)/, 1]
+          lang = 'bash' if lang == 'sh'
+          pre['data-language'] = lang
 
-        toc = doc # .docs-content
-          .parent # .row
-          .at_css('.sidebar')
-        toc['class'] = '_toc'
-        toc.css('a').each do |a|
-          a['class'] = '_toc-link'
-          a.parent.remove if a.content == 'Community Discussion'
+          pre.remove_attribute('class')
+          pre.content = pre.content
+          node.replace(pre)
         end
-        toc.css('ul').attr 'class', '_toc-list'
-
-        h1 = header.at_css('h1')
-        h1.content = h1.content
-          .titleize
-          .sub(/\bEnv\b/, 'env')
-          .sub(/\.[A-Z]/) { |s| s.downcase }
-          .sub(/\.babelrc/i, '.babelrc')
-          .sub('Common Js', 'CommonJS')
-          .sub('J Script', 'JScript')
-          .sub(/regexp/i, 'RegExp')
-          .sub(/api|Es(\d+)|cli|jsx?|[au]md/i) { |s| s.upcase }
 
-        doc.children.before toc
-        doc.children.before header.at_css 'p'
-        doc.children.before h1
+        css('code').remove_attr('class')
 
         doc
       end

+ 19 - 10
lib/docs/filters/babel/entries.rb

@@ -2,29 +2,38 @@ module Docs
   class Babel
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        at_css('h1').content.sub /^(minify|syntax)|(transform|preset)$/i, ''
+        at_css('h1').content
       end
 
       def get_type
-        if subpath.start_with? 'plugins/preset'
+        if subpath.start_with?('plugins/preset')
           'Presets'
-        elsif subpath.start_with? 'plugins/transform'
+        elsif subpath.start_with?('plugins/transform')
           'Transform Plugins'
-        elsif subpath.start_with? 'plugins/minify'
+        elsif subpath.start_with?('plugins/minify')
           'Minification'
-        elsif subpath.start_with? 'plugins/syntax'
+        elsif subpath.start_with?('plugins/syntax')
           'Syntax Plugins'
-        elsif subpath.start_with? 'plugins'
+        elsif subpath.start_with?('plugins')
           'Plugins'
-        elsif subpath.start_with? 'usage/'
+        elsif subpath.start_with?('usage/')
           'Usage'
+        elsif subpath.start_with?('core-packages/')
+          'Core Packages'
         else
-          'Docs'
+          'Miscellaneous'
         end
       end
 
-      def path
-        super
+      def additional_entries
+        return [] unless slug.include?('api')
+
+        css('h2').each_with_object [] do |node, entries|
+          name = node.content.strip
+          next unless name.start_with?('babel.')
+          name.sub! %r{\(.*}, '()'
+          entries << [name, node['id']]
+        end
       end
     end
   end

+ 9 - 14
lib/docs/scrapers/babel.rb

@@ -1,10 +1,9 @@
 module Docs
   class Babel < UrlScraper
-    self.type = 'babel'
+    self.type = 'simple'
     self.base_url = 'http://babeljs.io/docs/'
-    self.root_path = '/plugins/'
-    self.release = '6.26.0'
-    self.initial_paths = %w[faq tour usage/babel-register core-packages editors usage/caveats]
+    self.release = '6.26.1'
+    self.initial_paths = %w(core-packages/)
     self.links = {
       home: 'https://babeljs.io/',
       code: 'https://github.com/babel/babel'
@@ -13,19 +12,15 @@ module Docs
     html_filters.push 'babel/clean_html', 'babel/entries'
 
     options[:trailing_slash] = true
-    options[:container] = '.docs-content'
-    options[:skip] = %w{setup/ community/videos/}
-    options[:fix_urls] = ->(url) do
-      return url unless url.start_with? self.base_url
-      url.sub %r{/(index\.\w+)?$}, ''
-    end
+    options[:skip] = %w{setup/ editors/ community/videos/}
 
     options[:attribution] = <<-HTML
       &copy; 2018 Sebastian McKenzie<br>
-      Licensed under the
-      <a href="https://github.com/babel/website/blob/master/LICENSE">
-        MIT License
-      </a>
+      Licensed under the MIT License.
     HTML
+
+    stub '' do
+      '<div></div>'
+    end
   end
 end

BIN
public/icons/docs/babel/16.png


BIN
public/icons/docs/babel/16@2x.png


+ 1 - 0
public/icons/docs/babel/SOURCE

@@ -0,0 +1 @@
+https://github.com/babel/website/tree/master/website/static/img