浏览代码

Finish Ansible scraper

Thibaut Courouble 9 年之前
父节点
当前提交
c17932e811

二进制
assets/images/icons.png


二进制
assets/images/icons@2x.png


+ 1 - 1
assets/javascripts/collections/types.coffee

@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
       (result[@_groupFor(type)] ||= []).push(type)
     result.filter (e) -> e.length > 0
 
-  GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|getting\ started)($|[\s\):])/i
+  GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started)($|[\s\):])/i
 
   _groupFor: (type) ->
     if GUIDES_RGX.test(type.name)

+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2016-03-06",
-    "New documentation: <a href=\"/tensorflow/\">TensorFlow</a> and <a href=\"/haxe/\">Haxe</a>"
+    "New documentation: <a href=\"/tensorflow/\">TensorFlow</a>, <a href=\"/haxe/\">Haxe</a> and <a href=\"/ansible/\">Ansible</a>"
   ], [
     "2016-02-28",
     "New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -79,6 +79,11 @@ credits = [
     '2010-2016 Google, Inc.',
     'CC BY',
     'https://creativecommons.org/licenses/by/4.0/'
+  ], [
+    'Ansible',
+    '2012-2016 Michael DeHaan',
+    'GPLv3',
+    'https://raw.githubusercontent.com/ansible/ansible/devel/COPYING'
   ], [
     'Apache HTTP Server',
     '2016 The Apache Software Foundation',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -131,3 +131,4 @@
 ._icon-influxdata:before    { background-position: -5rem -10rem; @extend %darkIconFix !optional; }
 ._icon-tensorflow:before    { background-position: -6rem -10rem; }
 ._icon-haxe:before          { background-position: -7rem -10rem; }
+._icon-ansible:before       { background-position: -8rem -10rem; @extend %darkIconFix !optional; }

+ 8 - 8
lib/docs/filters/ansible/clean_html.rb

@@ -2,16 +2,16 @@ module Docs
   class Ansible
     class CleanHtmlFilter < Filter
       def call
-        # Remove 'Permalink to this headline'
-        css('.headerlink').remove
-        # Make proper table headers
-        css('th.head').each do |node|
-          node.name = 'th'
+        @doc = at_css('#page-content')
+
+        css('blockquote > div > pre:first-child:last-child', 'blockquote > div > ul:first-child:last-child').each do |node|
+          node.ancestors('blockquote').first.before(node).remove
         end
-        css('table').each do |node|
-          node.remove_attribute('border')
-          node.remove_attribute('cellpadding')
+
+        css('a > em').each do |node|
+          node.before(node.children).remove
         end
+
         doc
       end
     end

+ 16 - 49
lib/docs/filters/ansible/entries.rb

@@ -1,62 +1,29 @@
 module Docs
   class Ansible
     class EntriesFilter < Docs::EntriesFilter
-      TYPES = {
-        'intro' => 'Basic Topics',
-        'modules' => 'Basic Topics',
-        'common' => 'Basic Topics',
-        'playbooks' => 'Playbooks',
-        'become' => 'Playbooks',
-        'test' => 'Playbooks',
-        'YAMLSyntax' => 'Playbooks',
-        'list' => 'Module Categories',
-        'guide' => 'Advanced Topics',
-        'developing' => 'Advanced Topics',
-        'galaxy' => 'Advanced Topics'
-      }
-
-      HIDE_SLUGS = [
-        'playbooks',
-        'playbooks_special_topics',
-        'list_of_all_modules.html',
-        'modules_by_category',
-        'modules'
-      ]
-
       def get_name
-        node = at_css('h1')
-        name = node.content.strip
-        case
-        when name.empty?
-          super
-        when slug.eql?('modules_intro')
-          name = 'Modules'
-        when name.eql?('Introduction')
-          name = '#Introduction'
-        when name.eql?('Getting Started')
-          name = '#Getting Started'
-        when name.eql?('Introduction To Ad-Hoc Commands')
-          name = 'Ad-Hoc Commands'
-        end
+        name = at_css('h1').content.strip
+        name.remove! "\u{00B6}"
+        name.remove! %r{ \- .*}
+        name.remove! 'Introduction To '
+        name.remove! %r{ Guide\z}
         name
       end
 
       def get_type
-        if HIDE_SLUGS.include?(slug)
-          type = nil
+        if slug.include?('module')
+          if name =~ /\A[a-z]/ && node = css('.toctree-l2.current').last
+            "Modules: #{node.content.remove(' Modules')}"
+          else
+            'Modules'
+          end
+        elsif slug.include?('playbook')
+          'Playbooks'
+        elsif slug.include?('guide')
+          'Guides'
         else
-          akey = slug.split('_').first
-          type = TYPES.key?(akey) ? TYPES[akey] : 'Modules Reference'
+          'Miscellaneous'
         end
-        type
-      end
-
-      def additional_entries
-        []
-      end
-
-      def include_default_entry?
-        true
       end
     end
   end

+ 1 - 0
lib/docs/filters/codeigniter/clean_html.rb

@@ -16,6 +16,7 @@ module Docs
 
         css('table').each do |node|
           node.remove_attribute 'border'
+          node.remove_attribute 'cellpadding'
         end
 
         css('.section').each do |node|

+ 14 - 17
lib/docs/scrapers/ansible.rb

@@ -1,30 +1,27 @@
 module Docs
   class Ansible < UrlScraper
     self.name = 'Ansible'
-    self.type = 'ansible'
-    self.release = '2.1.0'
-    self.base_url = 'http://docs.ansible.com/ansible/'
-    self.root_path = 'intro.html'
+    self.type = 'sphinx'
+    self.release = '2.0.1'
+    self.base_url = 'https://docs.ansible.com/ansible/'
     self.links = {
-      home: 'http://docs.ansible.com',
+      home: 'https://www.ansible.com/',
       code: 'https://github.com/ansible/ansible'
     }
 
-    html_filters.push 'ansible/clean_html', 'ansible/entries'
+    html_filters.push 'ansible/entries', 'ansible/clean_html', 'codeigniter/clean_html'
 
-    options[:title] = 'Ansible'
-    options[:container] = '#page-content'
-    options[:skip] = [
-      'glossary.html',
-      'faq.html',
-      'community.html',
-      'tower.html',
-      'quickstart.html'
-    ]
+    options[:skip] = %w(
+      glossary.html
+      faq.html
+      community.html
+      tower.html
+      quickstart.html
+      list_of_all_modules.html)
 
     options[:attribution] = <<-HTML
-      &copy; Michael DeHaan<br>
-      Licensed under the GNU General Public License v.3.
+      &copy; 2012&ndash;2016 Michael DeHaan<br>
+      Licensed under the GNU General Public License version 3.
     HTML
   end
 end

二进制
public/icons/docs/ansible/16.png


二进制
public/icons/docs/ansible/16@2x.png


+ 1 - 0
public/icons/docs/ansible/SOURCE

@@ -0,0 +1 @@
+https://www.ansible.com/logos