Browse Source

Add MariaDB documentation

Switch to UrlScraper

Refactoring
Jasper van Merle 7 years ago
parent
commit
e894a2f3ee

+ 1 - 0
.gitignore

@@ -8,3 +8,4 @@ public/fonts
 public/docs/**/*
 !public/docs/docs.json
 !public/docs/**/index.json
+log

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -410,6 +410,11 @@ credits = [
     '2006-2016 LÖVE Development Team',
     'GFDL',
     'http://www.gnu.org/copyleft/fdl.html'
+  ], [
+    'MariaDB',
+    '2018 MariaDB',
+    'CC BY-SA & GFDL',
+    'https://mariadb.com/kb/en/library/documentation/+license/'
   ], [
     'Marionette.js',
     '2017 Muted Solutions, LLC',

+ 1 - 0
assets/stylesheets/application-dark.css.scss

@@ -64,6 +64,7 @@
         'pages/liquid',
         'pages/love',
         'pages/lua',
+        'pages/mariadb',
         'pages/mdn',
         'pages/meteor',
         'pages/modernizr',

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -64,6 +64,7 @@
         'pages/liquid',
         'pages/love',
         'pages/lua',
+        'pages/mariadb',
         'pages/mdn',
         'pages/meteor',
         'pages/modernizr',

+ 7 - 0
assets/stylesheets/pages/_mariadb.scss

@@ -0,0 +1,7 @@
+._mariadb {
+  @extend %simple;
+
+  .graybox {
+    @extend %note;
+  }
+}

+ 71 - 0
lib/docs/filters/mariadb/clean_html.rb

@@ -0,0 +1,71 @@
+require 'net/http'
+
+module Docs
+  class Mariadb
+    class CleanHtmlFilter < Filter
+      @@known_urls = Hash.new
+
+      def call
+        # Extract main content
+        @doc = at_css('#content')
+
+        # Remove navigation at the bottom
+        css('.simple_section_nav').remove
+
+        # Remove table of contents
+        css('.table_of_contents').remove
+
+        # Add code highlighting and remove nested tags
+        css('pre').each do |node|
+          node.content = node.content
+          node['data-language'] = 'sql'
+        end
+
+        # Fix links like http://kb-mirror.mariadb.com/kb/en/bitwise-or/ to not redirect to an external page
+        css('a').each do |node|
+          url = node['href']
+
+          if /^http:\/\/kb-mirror\.mariadb\.com\/kb\/en\/[^\/]+\/(#[^\/]+)?$/.match?(url)
+            final_url = get_final_url(url)
+
+            if !final_url.nil? && final_url.start_with?('/kb/en/library/documentation/')
+              node['href'] = "#{'../' * subpath.count('/')}#{final_url[29..-1]}index"
+            end
+          end
+        end
+
+        # Remove navigation items containing only numbers
+        css('.node_comments').each do |node|
+          if node.content.scan(/\D/).empty?
+            node.remove
+          end
+        end
+
+        # Convert listings (pages like http://kb-mirror.mariadb.com/kb/en/library/documentation/sql-statements-structure/) into tables
+        css('ul.listing').each do |node|
+          rows = []
+
+          node.css('li').each do |li|
+            name = li.at_css('.media-heading').content
+            description = li.at_css('.blurb').content
+            url = li.at_css('a')['href']
+            rows << "<tr><td><a href=\"#{url}\">#{name}</a></td><td>#{description}</td></tr>"
+          end
+
+          table = "<table><thead><tr><th>Title</th><th>Description</th></tr></thead><tbody>#{rows.join('')}</tbody></table>"
+          node.replace(table)
+        end
+
+        doc
+      end
+
+      def get_final_url(url)
+        unless @@known_urls.has_key?(url)
+          @@known_urls[url] = Net::HTTP.get_response(URI(url))['location']
+        end
+
+        @@known_urls[url]
+      end
+    end
+  end
+end

+ 14 - 0
lib/docs/filters/mariadb/entries.rb

@@ -0,0 +1,14 @@
+module Docs
+  class Mariadb
+    class EntriesFilter < Docs::EntriesFilter
+      def get_name
+        at_css('.container > h1').content.strip
+      end
+
+      def get_type
+        link = at_css('#breadcrumbs > a:nth-child(6)')
+        link.nil? ? at_css('#breadcrumbs > a:nth-child(5)').content : link.content
+      end
+    end
+  end
+end

+ 22 - 0
lib/docs/scrapers/mariadb.rb

@@ -0,0 +1,22 @@
+module Docs
+  class Mariadb < UrlScraper
+    self.name = 'MariaDB'
+    self.type = 'mariadb'
+    self.release = '10.3.8'
+    self.base_url = 'http://kb-mirror.mariadb.com/kb/en/library/documentation/'
+    self.links = {
+      home: 'https://mariadb.com/',
+      code: 'https://github.com/MariaDB/server'
+    }
+
+    html_filters.push 'mariadb/entries', 'mariadb/clean_html', 'title'
+
+    options[:download_images] = false
+    options[:root_title] = 'MariaDB'
+
+    options[:attribution] = <<-HTML
+      &copy; 2018 MariaDB<br>
+      Licensed under the Creative Commons Attribution 3.0 Unported License and the GNU Free Documentation License.
+    HTML
+  end
+end