Przeglądaj źródła

Add a Terraform scraper

Doug Fitzmaurice 7 lat temu
rodzic
commit
716fc26af1

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -96,6 +96,7 @@
         'pages/support_tables',
         'pages/tcl_tk',
         'pages/tensorflow',
+        'pages/terraform',
         'pages/underscore',
         'pages/vue',
         'pages/webpack',

+ 4 - 0
assets/stylesheets/pages/_terraform.scss

@@ -0,0 +1,4 @@
+._terraform {
+  @extend %simple;
+  .note, .alert { @extend %note; }
+}

+ 28 - 0
lib/docs/filters/terraform/clean_html.rb

@@ -0,0 +1,28 @@
+module Docs
+  class Terraform
+    class CleanHtmlFilter < Filter
+      def call
+        @doc = at_css('#inner')
+
+        css('hr', 'a.anchor').remove
+
+        css('.alert').each do |node|
+          node.name = 'blockquote'
+        end
+
+        css('pre').each do |node|
+          if language = node['class'][/(json|shell|ruby)/, 1]
+            node['data-language'] = language
+          end
+          # HCL isn't currently supported by Prism, Ruby syntax does an acceptable job for now
+          if language = node['class'][/(hcl)/, 1]
+            node['data-language'] = 'ruby'
+          end
+          node.content = node.content
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 76 - 0
lib/docs/filters/terraform/entries.rb

@@ -0,0 +1,76 @@
+module Docs
+  class Terraform
+    class EntriesFilter < Docs::EntriesFilter
+
+      # Some providers have non-trivial mappings between the directory they live in and their name
+      # Anything *not* in this list will be capitalized instead.
+      PROVIDER_NAME_MAP = {
+        'aws'              => 'AWS',
+        'azure'            => 'Azure (Legacy)',
+        'azurerm'          => 'Azure',
+        'centurylinkcloud' => 'CenturyLinkCloud',
+        'cloudscale'       => 'CloudScale.ch',
+        'cloudstack'       => 'CloudStack',
+        'dme'              => 'DNSMadeEasy',
+        'dns'              => 'DNS',
+        'dnsimple'         => 'DNSimple',
+        'do'               => 'DigitalOcean',
+        'github'           => 'GitHub',
+        'google'           => 'Google Cloud',
+        'http'             => 'HTTP',
+        'mysql'            => 'MySQL',
+        'newrelic'         => 'New Relic',
+        'oneandone'        => '1&1',
+        'opentelekomcloud' => 'OpenTelekomCloud',
+        'opsgenie'         => 'OpsGenie',
+        'opc'              => 'Oracle Public Cloud',
+        'oraclepaas'       => 'Oracle Cloud Platform',
+        'ovh'              => 'OVH',
+        'pagerduty'        => 'PagerDuty',
+        'panos'            => 'Palo Alto Networks',
+        'postgresql'       => 'PostgreSQL',
+        'powerdns'         => 'PowerDNS',
+        'profitbricks'     => 'ProfitBricks',
+        'rabbitmq'         => 'RabbitMQ',
+        'softlayer'        => 'SoftLayer',
+        'statuscake'       => 'StatusCake',
+        'tls'              => 'TLS',
+        'ultradns'         => 'UltraDNS',
+        'vcd'              => 'VMware vCloud Director',
+        'nsxt'             => 'VMware NSX-T',
+        'vsphere'          => 'VMware vSphere',
+      }
+
+      # Some providers have a lot (> 100) entries, which makes browsing them unwieldy.
+      # Any present in the list below will have an extra set of types added, breaking the pages out into the different
+      # products they offer.
+      LARGE_PROVIDERS = {
+        "aws"     => true,
+        "azurerm" => true,
+        "google"  => true,
+      }
+
+
+      def get_name
+        name ||= at_css('#inner h1').content
+        name.remove! "» "
+        name.remove! "Data Source: "
+        name
+      end
+
+      def get_type
+        category, subcategory, subfolder, page = *slug.split('/')
+        provider = page ? subcategory : category
+        nice_provider = PROVIDER_NAME_MAP[provider] || provider.capitalize
+
+        if LARGE_PROVIDERS[provider]
+          category_node = at_css('ul > li > ul > li.active')
+          parent_node = category_node.parent.previous_element if category_node
+          nice_provider = nice_provider + ": #{parent_node.content}" if category_node
+        end
+
+        nice_provider
+      end
+    end
+  end
+end

+ 23 - 0
lib/docs/scrapers/terraform.rb

@@ -0,0 +1,23 @@
+module Docs
+  class Terraform < UrlScraper
+    self.name = 'Terraform'
+    self.type = 'terraform'
+    self.release = '0.11.7'
+    self.base_url = 'https://www.terraform.io/docs/'
+    # self.dir = '/mnt/c/Users/Doug/Code/terraform-docs/www.terraform.io/docs'
+    self.root_path = 'index.html'
+    self.links = {
+      home: 'https://www.terraform.io/',
+      code: 'https://github.com/hashicorp/terraform'
+    }
+
+    html_filters.push 'terraform/entries', 'terraform/clean_html'
+
+    options[:skip_patterns] = [/enterprise/, /enterprise-legacy/]
+
+    options[:attribution] = <<-HTML
+      Copyright &copy; 2018 HashiCorp</br>
+      Licensed under the MPL 2.0 License.
+    HTML
+  end
+end

BIN
public/icons/docs/terraform/16.png


BIN
public/icons/docs/terraform/16@2x.png