Bläddra i källkod

Finish Padrino scraper

Thibaut Courouble 9 år sedan
förälder
incheckning
481233050d

BIN
assets/images/icons.png


BIN
assets/images/icons@2x.png


+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2016-06-05",
-    "New documentation: <a href=\"/kotlin/\">Kotlin</a>"
+    "New documentation: <a href=\"/kotlin/\">Kotlin</a> and <a href=\"/padrino/\">Padrino</a>"
   ], [
     "2016-04-24",
     "New documentations: <a href=\"/numpy/\">NumPy</a> and <a href=\"/apache_pig/\">Apache Pig</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -350,6 +350,11 @@ credits = [
     '2010-2016 The OpenTSDB Authors',
     'LGPLv2.1',
     'https://raw.githubusercontent.com/OpenTSDB/opentsdb.net/gh-pages/COPYING.LESSER'
+  ], [
+    'Padrino',
+    '2010-2016 Padrino',
+    'MIT',
+    'https://raw.githubusercontent.com/padrino/padrino-framework/master/padrino/LICENSE.txt'
   ], [
     'Perl',
     '1993-2016 Larry Wall and others',

+ 1 - 0
assets/javascripts/views/pages/simple.coffee

@@ -29,6 +29,7 @@ app.views.PostgresPage =
 app.views.RamdaPage =
 app.views.ReactPage =
 app.views.RethinkdbPage =
+app.views.RubydocPage =
 app.views.SinonPage =
 app.views.SocketioPage =
 app.views.SphinxSimplePage =

+ 1 - 0
assets/stylesheets/application-dark.css.scss

@@ -73,6 +73,7 @@
         'pages/requirejs',
         'pages/rethinkdb',
         'pages/rfc',
+        'pages/rubydoc',
         'pages/rust',
         'pages/socketio',
         'pages/sphinx',

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -73,6 +73,7 @@
         'pages/requirejs',
         'pages/rethinkdb',
         'pages/rfc',
+        'pages/rubydoc',
         'pages/rust',
         'pages/socketio',
         'pages/sphinx',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -140,3 +140,4 @@
 ._icon-apache_pig:before    { background-position: -4rem -11rem; }
 ._icon-numpy:before         { background-position: -5rem -11rem; }
 ._icon-kotlin:before        { background-position: -6rem -11rem; }
+._icon-padrino:before       { background-position: -7rem -11rem; }

+ 9 - 0
assets/stylesheets/pages/_rubydoc.scss

@@ -0,0 +1,9 @@
+._rubydoc {
+  @extend %simple;
+
+  p.note { @extend %note; }
+  span.note { @extend %label; }
+  span.note.private { @extend %label-red; }
+
+  h4 + ul { margin-top: 1em; }
+}

+ 31 - 3
lib/docs/filters/padrino/clean_html.rb

@@ -2,9 +2,37 @@ module Docs
   class Padrino
     class CleanHtmlFilter < Filter
       def call
-        css('.summary_toggle').remove
-        css('.inheritanceTree').remove
-        at_css('#content')
+        css('.summary_toggle', '.inheritanceTree', 'h1 .note', '.source_code', '.box_info dl:last-child').remove
+        css('a[href*="travis"]', 'a[href*="gemnasium"]', 'a[href*="codeclimate"]', 'a[href*="gitter"]').remove if root_page?
+
+        css('.signature').each do |node|
+          node.name = 'h3'
+        end
+
+        css('.permalink', 'div.docstring', 'div.discussion', '.method_details_list', '.attr_details',
+            'h3 strong', 'h3 a', 'h3 tt', 'h3 span', 'div.inline p', 'div.inline').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('.tag_title').each do |node|
+          node.name = 'h4'
+        end
+
+        css('span.summary_signature', 'tt', '.tags span.name').each do |node|
+          node.name = 'code'
+          node.inner_html = node.inner_html.strip
+        end
+
+        css('code > a').each do |node|
+          node.inner_html = node.inner_html.strip
+        end
+
+        css('pre.code').each do |node|
+          node.content = node.content
+          node['data-language'] = 'ruby'
+        end
+
+        doc
       end
     end
   end

+ 13 - 19
lib/docs/filters/padrino/entries.rb

@@ -2,34 +2,28 @@ module Docs
   class Padrino
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        name = at_css('h1, h2').content
-        name.remove! 'Class: '
-        name.remove! 'Module: '
+        at_css('h1').content.split(' ').last
       end
 
       def get_type
-        type = name.dup
-        type.remove! %r{#.+\z}
-        type.split('::')[0..2].join('::')
+        name.split('::')[0..1].join('::')
       end
 
       def additional_entries
-        return [] if root_page?
-        require 'cgi'
+        return [] if initial_page?
 
-        css('.summary_signature').inject [] do |entries, node|
-
-          name = node.children[1].attributes['title'].value
-          name = CGI.unescape(name)
-
-          unless name.start_with?('_')
-            name.prepend self.name
-            entries << [name, self.name.gsub('::','/').downcase.strip + node.children[1].attributes['href'].value.slice(/\#.*/)] unless entries.any? { |entry| entry[0] == name }
-          end
-
-          entries
+        css('.signature').each_with_object [] do |node, entries|
+          next if node.ancestors('.overload').present?
+          name = node.content.strip
+          name.remove! %r{[\s\(].*}
+          name.prepend(self.name)
+          entries << [name, node['id']]
         end
       end
+
+      def include_default_entry?
+        !initial_page?
+      end
     end
   end
 end

+ 12 - 6
lib/docs/scrapers/padrino.rb

@@ -1,11 +1,11 @@
 module Docs
   class Padrino < UrlScraper
-    self.name = 'padrino'
     self.slug = 'padrino'
-    self.type = 'ruby'
-    self.version = 'master'
+    self.type = 'rubydoc'
     self.release = '0.13.2'
-    self.base_url = 'http://www.rubydoc.info/github/padrino/padrino-framework'
+    self.base_url = 'http://www.rubydoc.info/github/padrino/padrino-framework/'
+    self.root_path = 'file/README.rdoc'
+    self.initial_paths = %w(index2)
     self.links = {
       home: 'http://padrinorb.com/',
       code: 'https://github.com/padrino/padrino-framework'
@@ -13,9 +13,15 @@ module Docs
 
     html_filters.push 'padrino/clean_html', 'padrino/entries'
 
+    options[:container] = ->(filter) { filter.root_page? ? '#filecontents' : '#content' }
+
     options[:attribution] = <<-HTML
-                  &copy; Padrino contributors<br>
-                        Licensed under the Creative Commons Attribution License.
+      &copy; 2010&ndash;2016 Padrino<br>
+      Licensed under the MIT License.
     HTML
+
+    stub 'index2' do
+      request_one(url_for('index')).body
+    end
   end
 end

BIN
public/icons/docs/padrino/16.png


BIN
public/icons/docs/padrino/16@2x.png


+ 1 - 0
public/icons/docs/padrino/SOURCE

@@ -0,0 +1 @@
+https://raw.githubusercontent.com/padrino/padrino-web/master/source/images/favicon.ico