浏览代码

Finish Perl scraper

Thibaut Courouble 9 年之前
父节点
当前提交
f78b3658b2

二进制
assets/images/icons.png


二进制
assets/images/icons@2x.png


+ 1 - 1
assets/javascripts/collections/types.coffee

@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
       (result[@_groupFor(type)] ||= []).push(type)
     result.filter (e) -> e.length > 0
 
-  GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started)($|[\s\):])/i
+  GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started|manual)($|[\s\):])/i
 
   _groupFor: (type) ->
     if GUIDES_RGX.test(type.name)

+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2016-04-17",
+    "New documentation: <a href=\"/perl/\">Perl</a>"
+  ], [
     "2016-04-10",
     "New documentations: <a href=\"/browser_support_tables/\">Support tables (caniuse.com)</a>, <a href=\"/gcc/\">GCC</a> and <a href=\"/gnu_fortran/\">GNU Fortran</a>"
   ], [

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -340,6 +340,11 @@ credits = [
     '2010-2016 The OpenTSDB Authors',
     'LGPLv2.1',
     'https://raw.githubusercontent.com/OpenTSDB/opentsdb.net/gh-pages/COPYING.LESSER'
+  ], [
+    'Perl',
+    '1993-2016 Larry Wall and others',
+    'GPLv1',
+    'http://perldoc.perl.org/index-licence.html'
   ], [
     'Phalcon',
     '2011-2015 Phalcon Framework Team',

+ 0 - 6
assets/javascripts/views/pages/perl.coffee

@@ -1,6 +0,0 @@
-#= require views/pages/base
-
-class app.views.PerlPage extends app.views.BasePage
-  prepare: ->
-    @highlightCode @findAllByTag('pre'), 'perl'
-    return

+ 1 - 0
assets/javascripts/views/pages/simple.coffee

@@ -9,6 +9,7 @@ class app.views.SimplePage extends app.views.BasePage
 app.views.EmberPage =
 app.views.GoPage =
 app.views.MeteorPage =
+app.views.PerlPage =
 app.views.RamdaPage =
 app.views.ReactPage =
 app.views.RethinkdbPage =

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -136,3 +136,4 @@
 ._icon-browser_support_tables:before { background-position: 0rem -11rem; }
 ._icon-gnu_fortran:before   { background-position: -1rem -11rem; }
 ._icon-gcc:before           { background-position: -2rem -11rem; }
+._icon-perl:before          { background-position: -3rem -11rem; }

+ 2 - 9
assets/stylesheets/pages/_perl.scss

@@ -1,12 +1,5 @@
 ._perl {
+  @extend %simple;
 
-  h2 { @extend %block-heading; }
-  h3 { @extend %block-label; }
-  h4 { @extend %block-label, %label-blue; }
-
-  .perlvar,
-  .perlfunction {
-    @extend %block-label, %label-blue;
-  }
-
+  > h4 { @extend %block-label; }
 }

+ 20 - 14
lib/docs/filters/perl/clean_html.rb

@@ -1,16 +1,9 @@
 module Docs
   class Perl
     class CleanHtmlFilter < Filter
-      REMOVE_LIST = %w(
-        noscript
-        #recent_pages
-        #from_search
-        #page_index
-        .mod_az_list
-      )
-
       def call
         root_page? ? root : other
+        doc
       end
 
       def root
@@ -20,12 +13,13 @@ module Docs
       def other
         @doc = at_css('#content_body')
 
-        css(*REMOVE_LIST).remove
+        css('noscript', '#recent_pages', '#from_search', '#page_index', '.mod_az_list').remove
+
+        css('h1, h2, h3, h4').each do |node|
+          node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
+        end
 
-        css('h4').each { |node| node.name = 'h5' }
-        css('h3').each { |node| node.name = 'h4' }
-        css('h2').each { |node| node.name = 'h3' }
-        css('h1').drop(1).each { |node| node.name = 'h2' }
+        at_css('h2').name = 'h1'
 
         css('a[name] + h2', 'a[name] + h3', 'a[name] + h4', 'a[name] + h5').each do |node|
           node['id'] = node.previous_element['name']
@@ -39,7 +33,19 @@ module Docs
           node.css('li').each do |li|
             li.content = li.content + "\n"
           end
-          node.content =  node.content
+          node.content = node.content
+          node.inner_html = node.inner_html.strip_heredoc
+          node['data-language'] = 'perl'
+        end
+
+        if slug =~ /functions/ || slug == 'perlvar'
+          css('ul > li[id]').each do |node|
+            heading = node.at_css('b')
+            heading.name = 'h2'
+            heading['id'] = node['id']
+            node.parent.before(node.children)
+            node.remove
+          end
         end
 
         doc

+ 28 - 35
lib/docs/filters/perl/entries.rb

@@ -5,19 +5,19 @@ module Docs
         'Platform specific' => 'Platform Specific',
         'Internals and C language interface' => 'Internals',
 
-        'perlop' => 'Perl Operators',
-        'perlvar' => 'Perl Variables',
+        'perlop' => 'Operators',
+        'perlvar' => 'Variables',
         'Functions' => 'Functions'
       }
 
+      MANUAL_TYPES = %w(Overview Tutorials FAQs)
+
       def breadcrumbs
-        at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
+        @breadcrumbs ||= at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
       end
 
       def include_default_entry?
-        not slug =~ /\Aindex/ and
-        not slug =~ /perlop\z/ and
-        not slug =~ /perlvar/
+        slug !~ /\Aindex/
       end
 
       def get_name
@@ -26,41 +26,34 @@ module Docs
 
       def get_type
         case breadcrumbs[1]
-          when 'Language reference'
-            REPLACE_TYPES[breadcrumbs[2]] || 'Language Reference'
-          when /\ACore modules/
-            'Core Modules'
-          else
-            REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
+        when 'Language reference'
+          REPLACE_TYPES[breadcrumbs[2]] || 'Language'
+        when /\ACore modules/
+          'Core Modules'
+        else
+          type = REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
+          type.prepend 'Manual: ' if MANUAL_TYPES.include?(type)
+          type
         end
       end
 
       def additional_entries
-        entries = []
         case slug
-          when /perlop\z/
-            css('h2').each do |node|
-              name = node.content
-              id = node.previous_element['name']
-              entries << [name, id, get_type]
-            end
-
-          when /perlvar/
-            css('#content_body > ul > li > b').each do |node|
-              node['class'] = 'perlvar'
-              name = node.content
-              id = node.previous_element['name']
-              entries << [name, id, get_type]
-            end
-
-          when /functions/
-            css('#content_body > ul > li > b').each do |node|
-              node['class'] = 'perlfunction'
-            end
-
+        when 'perlop'
+          css('h2').map do |node|
+            name = node.content
+            id = node.previous_element['name']
+            [name, id]
+          end
+        when 'perlvar'
+          css('#content_body > ul > li > b').map do |node|
+            name = node.content
+            id = node.previous_element['name']
+            [name, id]
+          end
+        else
+          []
         end
-
-        entries
       end
     end
   end

+ 16 - 13
lib/docs/scrapers/perl.rb

@@ -2,9 +2,7 @@ module Docs
   class Perl < FileScraper
     self.name = 'Perl'
     self.type = 'perl'
-    self.release = '5.22.0'
-    self.dir = ''
-    self.base_url = 'http://perldoc.perl.org/'
+    self.dir = '/Users/Thibaut/DevDocs/Docs/Perl'
     self.root_path = 'index.html'
     self.links = {
       home: 'https://www.perl.org/'
@@ -17,19 +15,24 @@ module Docs
       perlartistic.html
       perlgpl.html
       perlhist.html
-      perltodo.html
-      perlunifaq.html
-    )
+      perltodo.html )
 
-    options[:skip_patterns] = [
-      /\.pdf/,
-      /delta\.html/,
-      /\Aperlfaq/
-    ]
+    options[:skip_patterns] = [/\.pdf/, /delta\.html/]
 
     options[:attribution] = <<-HTML
-      &copy; 2010&ndash;2015 <br>
-      Dual Licensed under the GNU General Public License version 1+ or the Artistic License.
+      &copy; 1993&ndash;2016 Larry Wall and others<br>
+      Licensed under the GNU General Public License version 1 or later, or the Artistic License.<br>
+      The Perl logo is a trademark of the Perl Foundation.
     HTML
+
+    version '5.22' do
+      self.release = '5.22.0'
+      self.base_url = "http://perldoc.perl.org/#{self.release}/"
+    end
+
+    version '5.20' do
+      self.release = '5.20.2'
+      self.base_url = "http://perldoc.perl.org/#{self.release}/"
+    end
   end
 end

二进制
public/icons/docs/perl/16.png


二进制
public/icons/docs/perl/16@2x.png