Преглед изворни кода

Finish scikit-learn scraper

Thibaut Courouble пре 9 година
родитељ
комит
c4a543933d

BIN
assets/images/docs.png


BIN
assets/images/docs@2x.png


+ 1 - 1
assets/javascripts/collections/types.coffee

@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
       (result[@_groupFor(type)] ||= []).push(type)
     result.filter (e) -> e.length > 0
 
-  GUIDES_RGX = /(^|\()(guides?|tutorials?|reference|book|getting\ started|manual)($|[\):])/i
+  GUIDES_RGX = /(^|\()(guides?|tutorials?|reference|book|getting\ started|manual|examples)($|[\):])/i
   APPENDIX_RGX = /appendix/i
 
   _groupFor: (type) ->

+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2016-10-10",
+    "New documentation: <a href=\"/scikit_learn/\">scikit-learn</a>"
+  ], [
     "2016-09-18",
     "New documentations: <a href=\"/pandas/\">pandas</a> and <a href=\"/twig/\">Twig</a>"
   ], [

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -504,6 +504,11 @@ credits = [
     '2011 the scikit-image team',
     'BSD',
     'http://scikit-image.org/docs/dev/license.html'
+  ], [
+    'scikit-learn',
+    '2007-2016 The scikit-learn developers',
+    'BSD',
+    'https://raw.githubusercontent.com/scikit-learn/scikit-learn/master/COPYING'
   ], [
     'Sinon',
     '2010-2016 Christian Johansen',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -120,6 +120,7 @@
 ._icon-scikit_image:before  { background-position: -6rem -6rem; }
 ._icon-twig:before          { background-position: -7rem -6rem; }
 ._icon-pandas:before        { background-position: -8rem -6rem; }
+._icon-scikit_learn:before  { background-position: -9rem -6rem; }
 ._icon-bottle:before        { background-position: 0 -7rem; }
 ._icon-docker:before        { background-position: -1rem -7rem; }
 ._icon-cakephp:before       { background-position: -2rem -7rem; }

+ 2 - 12
assets/stylesheets/pages/_sphinx.scss

@@ -6,12 +6,12 @@
   dd > dl:not(.docutils) > dt { @extend %block-label; }
   dt + dt { margin-top: -.5em; }
 
-  .note, .admonition, div.versionadded, div.versionchanged, .deprecated-removed, .deprecated { @extend %note; }
+  .note, .admonition, div.versionadded, div.versionchanged, .deprecated-removed, .deprecated, .topic { @extend %note; }
 
   .important { @extend %note-orange; }
   .warning, .deprecated-removed, .deprecated { @extend %note-red; }
 
-  .versionmodified, span.title {
+  .versionmodified, span.title, .topic-title {
     display: block;
     font-weight: bold;
   }
@@ -37,16 +37,6 @@
   .admonition-title + dl { padding-top: .5em; }
 
   td > div { margin: 0 !important; }
-
-  .row-fluid {
-    h2 {
-      background: none;
-      border: none;
-      > a {
-        float: none;
-      }
-    }
-  }
 }
 
 ._sphinx {

+ 24 - 0
lib/docs/filters/scikit_learn/clean_html.rb

@@ -0,0 +1,24 @@
+module Docs
+  class ScikitLearn
+    class CleanHtmlFilter < Filter
+      def call
+        if root_page?
+          at_css('h1').content = 'scikit-learn'
+
+          css('.row-fluid').each do |node|
+            html = '<dl>'
+            node.css('.span4').each do |n|
+              html += "<dt>#{n.first_element_child.inner_html}</dt>"
+              html += "<dd>#{n.last_element_child.inner_html}</dd>"
+            end
+            html += '</dl>'
+            node.replace(html)
+          end
+        end
+
+        doc
+      end
+    end
+  end
+end
+

+ 10 - 2
lib/docs/filters/scikit_learn/entries.rb

@@ -7,11 +7,14 @@ module Docs
           name = at_css('dt').content.strip
           name.sub! %r{\(.*}, '()' # Remove function arguments
           name.remove! %r{[\=\[].*} # Remove [source] anchor
-          # name.remove! %r{\s=.*} # Remove the occasional '=' in class names
           name.remove! %r{\A(class(method)?) (sklearn\.)?}
         else
           # User guide
           name = at_css('h1').content.strip
+          name.remove! %r{\(.*?\)}
+          name.remove! %r{(?<![A-Z]):.*}
+          name.prepend 'Tutorial: ' if type == 'Tutorials'
+          name.prepend 'Example: ' if type == 'Examples'
         end
 
         name.remove! "\u{00B6}"
@@ -23,14 +26,19 @@ module Docs
         if subpath.start_with?('modules/generated')
           type = at_css('dt > .descclassname').content.strip
           type.remove! 'sklearn.'
-          type.remove! '.'
+          type.remove! %r{\.\z}
           type
+        elsif subpath.start_with?('tutorial')
+          'Tutorials'
+        elsif subpath.start_with?('auto_examples')
+          'Examples'
         else
           'Guide'
         end
       end
 
       def additional_entries
+        return [] unless subpath.start_with?('modules/generated')
         entries = []
 
         css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node|

+ 1 - 1
lib/docs/filters/sphinx/clean_html.rb

@@ -2,7 +2,7 @@ module Docs
   class Sphinx
     class CleanHtmlFilter < Filter
       def call
-        css('.headerlink', 'hr', '#contents .topic-title', '#topics .topic-title', 'colgroup').remove
+        css('.headerlink', 'hr', '#contents .topic-title', '#topics .topic-title', 'colgroup', '.line-block').remove
 
         css('.contents > ul:first-child:last-child.simple > li:first-child:last-child').each do |node|
           node.parent.before(node.at_css('> ul')) if node.at_css('> ul')

+ 8 - 17
lib/docs/scrapers/scikit_learn.rb

@@ -3,32 +3,23 @@ module Docs
     self.name = 'scikit-learn'
     self.slug = 'scikit_learn'
     self.type = 'sphinx'
-    self.release = '0.17.1'
-    self.base_url = "http://scikit-learn.org/0.17/"
+    self.release = '0.18'
+    self.base_url = 'http://scikit-learn.org/stable/'
     self.root_path = 'documentation.html'
-    self.initial_paths = %w(
-      user_guide.html
-      supervised_learning.html
-      unsupervised_learning.html
-      model_selection.html
-      data_transforms.html)
-
     self.links = {
       home: 'http://scikit-learn.org/',
       code: 'https://github.com/scikit-learn/scikit-learn'
     }
 
-    html_filters.push 'scikit_learn/entries', 'sphinx/clean_html'
-
-    options[:container] = '.body'
-
-    options[:root_title] = self.name
+    html_filters.push 'scikit_learn/entries', 'scikit_learn/clean_html', 'sphinx/clean_html'
 
-    options[:only] = self.initial_paths
-    options[:only_patterns] = [/\Amodules/, /\Adatasets/]
+    options[:container] = ->(filter) { filter.root_page? ? '.container-index' : '.body' }
+    options[:skip] = %w(tutorial/statistical_inference/finding_help.html)
+    options[:only_patterns] = [/\Amodules/, /\Adatasets/, /\Atutorial/, /\Aauto_examples/]
+    options[:skip_patterns] = [/\Adatasets\/(?!index)/]
 
     options[:attribution] = <<-HTML
-      &copy; 2007&ndash;2016 The scikit-learn deveopers<br>
+      &copy; 2007&ndash;2016 The scikit-learn developers<br>
       Licensed under the 3-clause BSD License.
     HTML
 

BIN
public/icons/docs/scikit_learn/16.png


BIN
public/icons/docs/scikit_learn/16@2x.png