瀏覽代碼

Finish Matplotlib scraper

Thibaut Courouble 9 年之前
父節點
當前提交
8d038302b9

二進制
assets/images/docs.png


二進制
assets/images/docs@2x.png


+ 2 - 2
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
-    "2016-07-02",
-    "New documentation: <a href=\"/cmake/\">CMake</a>"
+    "2016-07-03",
+    "New documentations: <a href=\"/cmake/\">CMake</a> and <a href=\"/matplotlib/\">Matplotlib</a>"
   ], [
     "2016-06-19",
     "New documentation: <a href=\"/love/\">L&Ouml;VE</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -295,6 +295,11 @@ credits = [
     '2004 John Gruber',
     'BSD',
     'https://daringfireball.net/projects/markdown/license'
+  ], [
+    'Matplotlib',
+    '2012-2016 Matplotlib Development Team. All rights reserved.',
+    'Custom',
+    'https://raw.githubusercontent.com/matplotlib/matplotlib/master/LICENSE/LICENSE'
   ], [
     'Meteor',
     '2011-2016 Meteor Development Group',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -132,6 +132,7 @@
 ._icon-q:before             { background-position: -5rem -8rem; }
 ._icon-react_native:before  { background-position: 0 -9rem; }
 ._icon-phalcon:before       { background-position: -1rem -9rem; }
+._icon-matplotlib:before    { background-position: -2rem -9rem; }
 ._icon-cmake:before         { background-position: -3rem -9rem; }
 ._icon-elixir:before        { background-position: -4rem -9rem; @extend %darkIconFix !optional; }
 ._icon-vagrant:before       { background-position: -5rem -9rem; }

+ 6 - 3
assets/stylesheets/pages/_sphinx.scss

@@ -1,6 +1,8 @@
 %sphinx {
   h2, h3 { @extend %block-heading; }
-  dl:not(.docutils) > dt { @extend %block-label, %label-blue; }
+  h4 { font-size: 1em; }
+  > dl:not(.docutils) > dt { @extend %block-label, %label-blue; }
+  dl > dl > dt { @extend %block-label; }
   dt + dt { margin-top: -.5em; }
 
   .note, .admonition, .versionadded, .versionchanged, .deprecated-removed { @extend %note; }
@@ -8,7 +10,7 @@
   .warning, .deprecated-removed { @extend %note-red; }
   .versionmodified { font-weight: bold; }
 
-  p > code, li > code { @extend %label; }
+  p > code, li > code, dd > code { @extend %label; }
 
   .admonition-title {
     float: left;
@@ -22,10 +24,11 @@
     clear: left;
     margin: 0;
   }
+  .admonition-title + dl { padding-top: .5em; }
 
   ul.simple { margin: 1em 0; }
 
-  h2 > a, h3 > a, dt > a { float: right; }
+  h2 > a, h3 > a, dt[id] > a.external { float: right; }
 }
 
 ._sphinx {

+ 56 - 1
lib/docs/filters/matplotlib/clean_html.rb

@@ -2,9 +2,64 @@ module Docs
   class Matplotlib
     class CleanHtmlFilter < Filter
       def call
-        css('.headerlink').each do |node|
+        css('.headerlink', 'hr').remove
+
+        css('.toc-backref', '.toctree-wrapper', '.contents', 'span.pre', 'pre a > code').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('div[class*="highlight-"]').each do |node|
+          pre = node.at_css('pre')
+          pre.content = pre.content
+          pre['data-language'] = node['class'][/highlight\-(\w+)/, 1]
+          node.replace(pre)
+        end
+
+        css('span[id]:empty').each do |node|
+          node.next_element['id'] = node['id']
           node.remove
         end
+
+        css('.section').each do |node|
+          if node['id']
+            if node.first_element_child['id']
+              node.element_children[1]['id'] = node['id']
+            else
+              node.first_element_child['id'] = node['id']
+            end
+          end
+
+          node.before(node.children).remove
+        end
+
+        css('h2 > a > code').each do |node|
+          node.parent.before(node.content).remove
+        end
+
+        css('dt[id]').each do |node|
+          node.inner_html = "<code>#{node.content.strip}</code>"
+        end
+
+        css('li > p:first-child:last-child').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('table[border]').each do |node|
+          node.remove_attribute 'border'
+        end
+
+        css('code[class]').each do |node|
+          node.remove_attribute 'class'
+        end
+
+        css('h1').each do |node|
+          node.content = node.content
+        end
+
+        css('p.rubric').each do |node|
+          node.name = 'h4'
+        end
+
         doc
       end
     end

+ 27 - 9
lib/docs/filters/matplotlib/entries.rb

@@ -1,29 +1,47 @@
 module Docs
   class Matplotlib
     class EntriesFilter < Docs::EntriesFilter
+      NAME_BY_SLUG = {
+        'matplotlib_configuration_api' => 'matplotlib',
+        'tri_api' => 'tri'
+      }
+
+      TYPE_BY_SLUG = {
+        'pyplot_summary' => 'pyplot'
+      }
+
       def get_name
+        return NAME_BY_SLUG[slug] if NAME_BY_SLUG.key?(slug)
         name = at_css('h1').content.strip
         name.remove! "\u{00b6}"
+        name.remove! 'matplotlib.'
+        name.remove! %r{ \(.*\)}
+        name.downcase!
         name
       end
 
       def get_type
-        name = at_css('h1').content.strip
-        name.remove! "\u{00b6}"
-        name
+        return TYPE_BY_SLUG[slug] if TYPE_BY_SLUG.key?(slug)
+        name.split('.').first
       end
 
       def additional_entries
         entries = []
-        ents = css('dt .descname')
 
-        if ents
-          ents.each do |node|
-            name = node.content.sub(/\(.*\)/, '()')
-            id = node.parent['id']
-            entries << [name, id, get_name]
+        css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node|
+          entries << [node['id'].remove('matplotlib.'), node['id']]
+        end
+
+        css('.data > dt[id]').each do |node|
+          if node['id'].split('.').last.upcase! # skip constants
+            entries << [node['id'].remove('matplotlib.'), node['id']]
           end
         end
+
+        css('.function > dt[id]', '.method > dt[id]', '.classmethod > dt[id]').each do |node|
+          entries << [node['id'].remove('matplotlib.') + '()', node['id']]
+        end
+
         entries
       end
     end

+ 8 - 6
lib/docs/scrapers/matplotlib.rb

@@ -1,9 +1,8 @@
 module Docs
-  class Matplotlib < FileScraper
+  class Matplotlib < UrlScraper
     self.name = 'Matplotlib'
     self.type = 'sphinx'
     self.root_path = 'index.html'
-    self.release = "1.5.1"
     self.links = {
       home: 'http://matplotlib.org/',
       code: 'https://github.com/matplotlib/matplotlib'
@@ -12,13 +11,16 @@ module Docs
     html_filters.push 'matplotlib/entries', 'matplotlib/clean_html'
 
     options[:container] = '.body'
+    options[:skip] = %w(api_changes.html)
 
     options[:attribution] = <<-HTML
-      &copy; Matplotlib Development Team <br>
-      Licensed under the BSD License.
+      &copy; 2012&ndash;2016 Matplotlib Development Team. All rights reserved.<br>
+      Licensed under the Matplotlib License Agreement.
     HTML
 
-    self.dir = '~/workspace/tmp/matplotlib/matplotlib.github.com-master/1.5.1/api/'
-    # self.base_url = 'http://matplotlib.org/api/'
+    version '1.5' do
+      self.release = '1.5.1'
+      self.base_url = 'http://matplotlib.org/1.5.1/api/'
+    end
   end
 end

二進制
public/icons/docs/matplotlib/16.png


二進制
public/icons/docs/matplotlib/16@2x.png


+ 1 - 0
public/icons/docs/matplotlib/SOURCE

@@ -0,0 +1 @@
+https://upload.wikimedia.org/wikipedia/commons/thumb/8/84/Matplotlib_icon.svg/1024px-Matplotlib_icon.svg.png