1
0
Эх сурвалжийг харах

Merge pull request #841 from jmerle/mariadb

Add MariaDB documentation
Jasper van Merle 6 жил өмнө
parent
commit
c8380bb228

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -451,6 +451,11 @@ credits = [
     '2006-2016 LÖVE Development Team',
     'GFDL',
     'http://www.gnu.org/copyleft/fdl.html'
+  ], [
+    'MariaDB',
+    '2019 MariaDB',
+    'CC BY-SA & GFDL',
+    'https://mariadb.com/kb/en/library/documentation/+license/'
   ], [
     'Marionette.js',
     '2017 Muted Solutions, LLC',

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -70,6 +70,7 @@
         'pages/liquid',
         'pages/love',
         'pages/lua',
+        'pages/mariadb',
         'pages/mdn',
         'pages/meteor',
         'pages/mkdocs',

+ 7 - 0
assets/stylesheets/pages/_mariadb.scss

@@ -0,0 +1,7 @@
+._mariadb {
+  @extend %simple;
+
+  .graybox, .product {
+    @extend %note;
+  }
+}

+ 67 - 0
lib/docs/filters/mariadb/clean_html.rb

@@ -0,0 +1,67 @@
+module Docs
+  class Mariadb
+    class CleanHtmlFilter < Filter
+      def call
+        # Return the empty doc if the EraseInvalidPagesFilter detected this page shouldn't be scraped
+        return doc if doc.inner_html == ''
+
+        # Extract main content
+        @doc = at_css('#content')
+
+        # Remove navigation at the bottom
+        css('.simple_section_nav').remove
+
+        # Remove table of contents
+        css('.table_of_contents').remove
+
+        # Add code highlighting and remove nested tags
+        css('pre').each do |node|
+          node.content = node.content
+          node['data-language'] = 'sql'
+        end
+
+        # Fix images
+        css('img').each do |node|
+          node['src'] = node['src'].sub('http:', 'https:')
+        end
+
+        # Remove navigation items containing only numbers
+        css('.node_comments').each do |node|
+          if node.content.scan(/\D/).empty?
+            node.remove
+          end
+        end
+
+        # Convert listings (pages like https://mariadb.com/kb/en/library/documentation/sql-statements-structure/) into tables
+        css('ul.listing').each do |node|
+          rows = []
+
+          node.css('li:not(.no_data)').each do |li|
+            name = li.at_css('.media-heading').content
+            description = li.at_css('.blurb').content
+            url = li.at_css('a')['href']
+            rows << "<tr><td><a href=\"#{url}\">#{name}</a></td><td>#{description}</td></tr>"
+          end
+
+          table = "<table><thead><tr><th>Title</th><th>Description</th></tr></thead><tbody>#{rows.join('')}</tbody></table>"
+          node.replace(table)
+        end
+
+        # Turn note titles into <strong> tags
+        css('.product_title').each do |node|
+          node.name = 'strong'
+        end
+
+        # Remove comments and questions
+        css('.related_questions, #comments').remove
+        css('h2').each do |node|
+          if node.content == 'Comments'
+            node.remove
+          end
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 24 - 0
lib/docs/filters/mariadb/entries.rb

@@ -0,0 +1,24 @@
+module Docs
+  class Mariadb
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        return 'Name' if doc.inner_html == ''
+
+        at_css('#content > h1').content.strip
+      end
+
+      def get_type
+        return 'Type' if doc.inner_html == ''
+
+        link = at_css('#breadcrumbs > a:nth-child(4)')
+        link.nil? ? at_css('#breadcrumbs > a:nth-child(3)').content : link.content
+      end
+
+      def entries
+        # Don't add an entry for this page if the EraseInvalidPagesFilter detected this page shouldn't be scraped
+        return [] if doc.inner_html == ''
+        super
+      end
+    end
+  end
+end

+ 34 - 0
lib/docs/filters/mariadb/erase_invalid_pages.rb

@@ -0,0 +1,34 @@
+module Docs
+  class Mariadb
+    class EraseInvalidPagesFilter < Filter
+      @@seen_urls = Hash.new
+
+      def call
+        # The MariaDB documentation uses urls like mariadb.com/kb/en/*
+        # This means there is no way to detect if a page should be scraped based on it's url
+        # We run this filter before the internal_urls filter scrapes all internal urls
+        # If this page should not be scraped, we erase it's contents in here so that the internal urls are not picked up
+        # The entries filter will make sure that no entry is saved for this page
+
+        if at_css('a.crumb[href="https://mariadb.com/kb/en/documentation/"]').nil?
+          doc.inner_html = ''
+        end
+
+        current_page = at_css('a.crumb.node_link')
+        unless current_page.nil?
+          url = current_page['href']
+
+          # Some links lead to the same page
+          # Only parse the page one time
+          if @@seen_urls.has_key?(url)
+            doc.inner_html = ''
+          end
+
+          @@seen_urls[url] = true
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 36 - 0
lib/docs/scrapers/mariadb.rb

@@ -0,0 +1,36 @@
+module Docs
+  class Mariadb < UrlScraper
+    self.name = 'MariaDB'
+    self.type = 'mariadb'
+    self.release = '10.4.8'
+    self.base_url = 'https://mariadb.com/kb/en/'
+    self.root_path = 'library/documentation/'
+    self.links = {
+      home: 'https://mariadb.com/',
+      code: 'https://github.com/MariaDB/server'
+    }
+
+    html_filters.insert_before 'internal_urls', 'mariadb/erase_invalid_pages'
+    html_filters.push 'mariadb/entries', 'mariadb/clean_html'
+
+    options[:rate_limit] = 200
+    options[:skip_patterns] = [
+      /\+/,
+      /\/ask\//,
+      /-release-notes\//,
+      /-changelog\//,
+      /^documentation\//,
+      /^mariadb-server-documentation\//,
+    ]
+
+    options[:attribution] = <<-HTML
+      &copy; 2019 MariaDB<br>
+      Licensed under the Creative Commons Attribution 3.0 Unported License and the GNU Free Documentation License.
+    HTML
+
+    def get_latest_version(opts)
+      doc = fetch_doc('https://mariadb.com/downloads/', opts)
+      doc.at_css('[data-version-id="mariadb_server-versions"] option').content.split('-')[0]
+    end
+  end
+end

BIN
public/icons/docs/mariadb/16.png


BIN
public/icons/docs/mariadb/16@2x.png


+ 1 - 0
public/icons/docs/mariadb/SOURCE

@@ -0,0 +1 @@
+https://mariadb.org/about/logos/