소스 검색

Finish pandas scraper

Thibaut Courouble 9 년 전
부모
커밋
659cf94fe8

BIN
assets/images/docs.png


BIN
assets/images/docs@2x.png


+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2016-09-18",
-    "New documentation: <a href=\"/twig/\">Twig</a>"
+    "New documentations: <a href=\"/pandas/\">pandas</a> and <a href=\"/twig/\">Twig</a>"
   ], [
     "2016-09-05",
     "New documentations: <a href=\"/fish/\">Fish</a>, <a href=\"/bottle/\">Bottle</a> and <a href=\"/scikit_image/\">scikit-image</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -399,6 +399,11 @@ credits = [
     '2010-2016 Padrino',
     'MIT',
     'https://raw.githubusercontent.com/padrino/padrino-framework/master/padrino/LICENSE.txt'
+  ], [
+    'pandas',
+    '2011-2012 Lambda Foundry, Inc. and PyData Development Team<br>&copy; 2008-2011 AQR Capital Management, LLC<br>&copy; 2008-2014 the pandas development team',
+    'BSD',
+    'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE'
   ], [
     'Perl',
     '1993-2016 Larry Wall and others',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -119,6 +119,7 @@
 ._icon-fish:before          { background-position: -5rem -6rem; @extend %darkIconFix !optional; }
 ._icon-scikit_image:before  { background-position: -6rem -6rem; }
 ._icon-twig:before          { background-position: -7rem -6rem; }
+._icon-pandas:before        { background-position: -8rem -6rem; }
 ._icon-bottle:before        { background-position: 0 -7rem; }
 ._icon-docker:before        { background-position: -1rem -7rem; }
 ._icon-cakephp:before       { background-position: -2rem -7rem; }

+ 2 - 1
assets/stylesheets/pages/_sphinx.scss

@@ -1,5 +1,6 @@
 %sphinx {
-  h2, h3 { @extend %block-heading; }
+  h2 { @extend %block-heading; }
+  h3 { @extend %block-label; }
   h4 { font-size: 1em; }
   > dl:not(.docutils) > dt { @extend %block-label, %label-blue; }
   dd > dl:not(.docutils) > dt { @extend %block-label; }

+ 11 - 0
lib/docs/filters/pandas/clean_html.rb

@@ -4,6 +4,17 @@ module Docs
       def call
         @doc = at_css('.body')
 
+        if root_page?
+          css('a[href$=".zip"]', 'a[href$=".pdf"]', '.toctree-wrapper').remove
+          at_css('h1').content = 'pandas'
+        end
+
+        css('h2 > a.reference', 'h3 > a.reference').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('.anchor-link').remove
+
         doc
       end
     end

+ 9 - 4
lib/docs/filters/pandas/entries.rb

@@ -2,20 +2,25 @@ module Docs
   class Pandas
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        if dt = at_css('dt')
-          name = dt.content.strip
+        if subpath.start_with?('generated')
+          name = at_css('dt').content.strip
           name.sub! %r{\(.*}, '()'
           name.remove! %r{\s=.*}
-          name.remove! %r{\A(class(method)?) }
+          name.remove! %r{\A(class(method)?) (pandas\.)?}
         else
           name = at_css('h1').content.strip
+          name.prepend "#{css('.toctree-l1 > a:not([href^="http"])').to_a.index(at_css('.toctree-l1.current > a')) + 1}. "
         end
         name.remove! "\u{00B6}"
         name
       end
 
       def get_type
-        css(".toctree-l2.current > a").last.content
+        if subpath.start_with?('generated')
+          css('.toctree-l2.current > a').last.content
+        else
+          'Manual'
+        end
       end
     end
   end

+ 1 - 1
lib/docs/filters/sphinx/clean_html.rb

@@ -49,7 +49,7 @@ module Docs
         end
 
         css('dt').each do |node|
-          next unless node['id'] || node.at_css('code')
+          next unless node['id'] || node.at_css('code, .classifier')
           links = []
           links << node.children.last.remove while node.children.last.try(:name) == 'a'
           node.inner_html = "<code>#{node.content.strip}</code> "

+ 6 - 5
lib/docs/scrapers/pandas.rb

@@ -2,7 +2,7 @@ module Docs
   class Pandas < UrlScraper
     self.name = 'pandas'
     self.type = 'sphinx'
-    self.root_path = 'api.html'
+    self.root_path = 'index.html'
     self.links = {
       home: 'http://pandas.pydata.org/',
       code: 'https://github.com/pydata/pandas'
@@ -13,12 +13,13 @@ module Docs
     # Cannot take only the body, as the sidebar gives info about the type.
     options[:container] = '.document'
 
-    # Using the above container, leads to tons of anchors. Only keep the generated/ pages.
-    options[:only_patterns] = [/\Agenerated\//]
+    options[:skip] = %w(internals.html release.html contributing.html whatsnew.html)
 
     options[:attribution] = <<-HTML
-      &copy; 2008&ndash;2014, the pandas development team.<br>
-      Licensed under the BSD license.
+      &copy; 2011&ndash;2012 Lambda Foundry, Inc. and PyData Development Team<br>
+      &copy; 2008&ndash;2011 AQR Capital Management, LLC<br>
+      &copy; 2008&ndash;2014 the pandas development team<br>
+      Licensed under the 3-clause BSD License.
     HTML
 
     version '0.18' do

BIN
public/icons/docs/pandas/16.png


BIN
public/icons/docs/pandas/16@2x.png