1
0
Эх сурвалжийг харах

Finish InfluxData scraper

Thibaut Courouble 9 жил өмнө
parent
commit
ebc6be3215

BIN
assets/images/icons.png


BIN
assets/images/icons@2x.png


+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2016-02-28",
-    "New documentations: <a href=\"/codeigniter/\">CodeIgniter</a> and <a href=\"/nginx_lua_module/\">nginx Lua Module</a>"
+    "New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"
   ], [
     "2016-02-15",
     "New documentations: <a href=\"/cakephp/\">CakePHP</a>, <a href=\"/chef/\">Chef</a> and <a href=\"/ramda/\">Ramda</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -199,6 +199,11 @@ credits = [
     'The University of Glasgow',
     'BSD',
     'https://www.haskell.org/ghc/license'
+  ], [
+    'InfluxData',
+    '2015 InfluxData, Inc.',
+    'MIT',
+    'https://github.com/influxdata/docs.influxdata.com/blob/master/LICENSE'
   ], [
     'jQuery',
     'Packt Publishing<br>&copy; jQuery Foundation and other contributors',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -128,3 +128,4 @@
 ._icon-chef:before          { background-position: -2rem -10rem; }
 ._icon-ramda:before         { background-position: -3rem -10rem; @extend %darkIconFix !optional; }
 ._icon-codeigniter:before   { background-position: -4rem -10rem; @extend %darkIconFix !optional; }
+._icon-influxdata:before    { background-position: -5rem -10rem; @extend %darkIconFix !optional; }

+ 1 - 0
assets/stylesheets/pages/_base.scss

@@ -18,6 +18,7 @@
 
 ._cordova,
 ._grunt,
+._influxdata,
 ._less,
 ._lodash,
 ._marionette,

+ 27 - 0
lib/docs/filters/influxdata/clean_html.rb

@@ -0,0 +1,27 @@
+module Docs
+  class Influxdata
+    class CleanHtmlFilter < Filter
+      def call
+        if root_page?
+          doc.inner_html = ' '
+          return doc
+        end
+
+        doc = @doc.at_css('#page-content')
+
+        css('.page--contribute', 'hr').remove
+
+        css('.page--body', '.page--title', 'font').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('pre > code').each do |node|
+          node.parent['class'] = node['class']
+          node.before(node.children).remove
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 30 - 0
lib/docs/filters/influxdata/entries.rb

@@ -0,0 +1,30 @@
+module Docs
+  class Influxdata
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        at_css('#page-title h1').content
+      end
+
+      def get_type
+        product = at_css('.product-switcher--current').content.strip
+        return product if %w(Chronograf Telegraf).include?(product)
+
+        node = at_css('#product-sidebar a[href="index"]')
+        node = node.parent.previous_element unless node.parent['class'] == 'product-sidebar--section-title'
+
+        type = node.content.strip
+        type.remove! ' Reference'
+
+        if type.in?(%w(Getting\ Started Introduction Guides))
+          product
+        else
+          "#{product}: #{type}"
+        end
+      end
+
+      def include_default_entry?
+        !subpath.end_with?("v#{Influxdata.release}/")
+      end
+    end
+  end
+end

+ 0 - 18
lib/docs/filters/influxdb/clean_html.rb

@@ -1,18 +0,0 @@
-module Docs
-  class Influxdb
-    class CleanHtmlFilter < Filter
-      def call
-        doc = @doc.at_css('#page-content')
-
-        # Re-position the page header
-        header = at_css('.page--body h1')
-        doc.children.first.add_next_sibling header
-
-        # Remove the contribution
-        at_css('.page--contribute').remove
-
-        doc 
-      end
-    end
-  end
-end

+ 0 - 19
lib/docs/filters/influxdb/entries.rb

@@ -1,19 +0,0 @@
-module Docs
-  class Influxdb
-    class EntriesFilter < Docs::EntriesFilter
-      
-      def get_name
-        at_css('#page-title h1').content
-      end
-
-      def get_type
-        # This is kinda hacky, we are fetching the current type from
-        # the url, we are asumming that the url pattern is
-        # category/page or category
-        path = current_url.relative_path_from(base_url)
-        "InfluxDB: #{path.split('/').first.titleize}"
-      end
-
-    end
-  end
-end

+ 32 - 0
lib/docs/scrapers/influxdata.rb

@@ -0,0 +1,32 @@
+module Docs
+  class Influxdata < UrlScraper
+    self.name = 'InfluxData'
+    self.type = 'influxdata'
+    self.release = '0.10'
+    self.base_url = 'https://docs.influxdata.com/'
+
+    html_filters.push 'influxdata/entries', 'influxdata/clean_html', 'title'
+
+    options[:trailing_slash] = true
+
+    options[:root_title] = 'InfluxData Documentation'
+    options[:title] = false
+
+    options[:only_patterns] = [/(telegraf|influxdb|chronograf|kapacitor)\/v#{release}/]
+
+    options[:skip] = [
+      "influxdb/v#{release}/sample_data/data_download/",
+      "influxdb/v#{release}/tools/grafana/",
+      "influxdb/v#{release}/about/"
+    ]
+
+    options[:replace_paths] = {
+      "influxdb/v#{release}/guides/clustering/" => 'influxdb/v0.10/clustering/'
+    }
+
+    options[:attribution] = <<-HTML
+      &copy; 2015 InfluxData, Inc.<br>
+      Licensed under the MIT license.
+    HTML
+  end
+end

+ 0 - 15
lib/docs/scrapers/influxdb.rb

@@ -1,15 +0,0 @@
-module Docs
-  class Influxdb < UrlScraper
-    self.name = 'InfluxDB'
-    self.type = 'influxdb'
-    self.release = '0.10'
-    self.base_url = 'https://docs.influxdata.com/influxdb/v0.10/'
-    
-    html_filters.push 'influxdb/entries', 'influxdb/clean_html'
-
-    options[:attribution] = <<-HTML
-      &copy; 2010&ndash;2015 InfluxData<br>
-      Licensed under the MIT license.
-    HTML
-  end
-end

BIN
public/icons/docs/influxdata/16.png


BIN
public/icons/docs/influxdata/16@2x.png