Browse Source

Update and improve PostgreSQL documentation (9.4)

Thibaut 11 years ago
parent
commit
df1f203310

+ 4 - 0
lib/docs/filters/postgresql/clean_html.rb

@@ -11,6 +11,10 @@ module Docs
       end
 
       def other
+        @doc = at_css('#docContent')
+
+        css('.NAVHEADER', '.NAVFOOTER').remove
+
         css('a[name]').each do |node|
           node.parent['id'] = node['name']
           node.before(node.children).remove

+ 88 - 62
lib/docs/filters/postgresql/entries.rb

@@ -11,98 +11,144 @@ module Docs
         'System Administration Functions' => 'Administration Functions',
         'System Information Functions'    => 'Information Functions' }
 
-      def get_name
-        name = at_css('h1').content
-        clean_heading_name(name)
+      PREPEND_TYPES = [
+        'Type Conversion',
+        'Full Text Search',
+        'Performance Tips',
+        'Server Configuration',
+        'Monitoring' ]
+
+      REPLACE_TYPES = {
+        'Routine Database Maintenance Tasks' => 'Maintenance',
+        'High Availability, Load Balancing, and Replication' => 'High Availability',
+        'Monitoring Database Activity' => 'Monitoring',
+        'Monitoring Disk Usage' => 'Monitoring',
+        'Reliability and the Write-Ahead Log' => 'Write-Ahead Log' }
+
+      def base_name
+        @base_name ||= clean_heading_name(at_css('h1').content)
+      end
 
-        if %w(Overview Introduction).include?(name)
+      def get_name
+        if %w(Overview Introduction).include?(base_name)
           result[:pg_chapter_name]
+        elsif PREPEND_TYPES.include?(type)
+          "#{type}: #{base_name}"
         else
-          name.remove! ' (Common Table Expressions)'
-          REPLACE_NAMES[name] || name
+          REPLACE_NAMES[base_name] || base_name
         end
       end
 
-      def clean_heading_name(name)
-        name.remove! %r{\A[\d\.\s]+}
-        name.remove! 'Using '
-        name.remove! %r{\AThe }
-        name
-      end
-
       def get_type
         return if initial_page?
 
         if result[:pg_up_path] == 'sql-commands.html'
           'Commands'
-        elsif result[:pg_up_path].start_with? 'reference-'
+        elsif result[:pg_up_path].start_with?('reference-')
           'Applications'
         elsif type = result[:pg_chapter_name]
-          if type.start_with?('Func') && (match = name.match(/\A(?!Form|Seq|Set|Enum)(.+) Func/))
+          if type.start_with?('Func') && (match = base_name.match(/\A(?!Form|Seq|Set|Enum)(.+) Func/))
             "Functions: #{match[1]}"
           else
-            type.remove 'SQL '
+            type.remove! 'SQL '
+            REPLACE_TYPES[type] || type
           end
         end
       end
 
       def additional_entries
         return [] if skip_additional_entries?
-        return get_config_entries if config_page?
+        return config_additional_entries if type && type.include?('Configuration')
+        return data_types_additional_entries if type == 'Data Types'
         return get_heading_entries('h3[id]') if slug == 'functions-xml'
 
-        if type == 'Data Types'
-          return get_custom_entries case slug
-            when 'rangetypes'          then 'li > p > .TYPE:first-child'
-            when 'datatype-textsearch' then '.SECT2 > .TYPE'
-            else '.CALSTABLE td:first-child > .TYPE' end
-        end
-
         entries = get_heading_entries('h2[id]')
 
-        if slug == 'queries-union'
+        case slug
+        when 'queries-union'
           entries.concat get_custom_entries('p > .LITERAL:first-child')
-        elsif slug == 'queries-table-expressions'
+        when 'queries-table-expressions'
           entries.concat get_heading_entries('h3[id]')
           entries.concat get_custom_entries('dt > .LITERAL:first-child')
-        elsif slug == 'functions-logical'
+        when 'functions-logical'
           entries.concat get_custom_entries('> table td:first-child > code')
-        elsif slug == 'functions-formatting'
+        when 'functions-formatting'
           entries.concat get_custom_entries('#FUNCTIONS-FORMATTING-TABLE td:first-child > code')
-        elsif slug == 'functions-admin'
+        when 'functions-admin'
           entries.concat get_custom_entries('.TABLE td:first-child > code')
-        elsif slug == 'functions-string'
+        when 'functions-string'
           entries.concat get_custom_entries('> div[id^="FUNC"] td:first-child > code')
-        elsif type && type.start_with?('Functions')
-          entries.concat get_custom_entries('> .TABLE td:first-child > code:first-child')
-          entries.concat get_comparison_entries if slug == 'functions-comparison'
+        else
+          if type && type.start_with?('Functions')
+            entries.concat get_custom_entries('> .TABLE td:first-child > code:first-child')
+            entries.concat %w(IS NULL BETWEEN DISTINCT\ FROM).map { |name| ["#{self.name}: #{name}"] } if slug == 'functions-comparison'
+          end
         end
 
         entries
       end
 
-      def get_config_entries
+      def config_additional_entries
         css('.VARIABLELIST dt[id]').map do |node|
           name = node.at_css('.VARNAME').content
           ["Config: #{name}", node['id']]
         end
       end
 
+      def data_types_additional_entries
+        selector = case slug
+        when 'rangetypes'
+          'li > p > .TYPE:first-child'
+        when 'datatype-textsearch'
+          '.SECT2 > .TYPE'
+        else
+          '.CALSTABLE td:first-child > .TYPE'
+        end
+        get_custom_entries(selector)
+      end
+
+      def include_default_entry?
+        !initial_page? && !at_css('.TOC')
+      end
+
+      SKIP_ENTRIES_SLUGS = [
+        'config-setting',
+        'applevel-consistency' ]
+
+      SKIP_ENTRIES_TYPES = [
+        'Localization',
+        'Type Conversion',
+        'Full Text Search',
+        'Performance Tips',
+        'Client Authentication',
+        'Managing Databases',
+        'Maintenance',
+        'Backup and Restore',
+        'High Availability',
+        'Monitoring' ]
+
+      def skip_additional_entries?
+        SKIP_ENTRIES_SLUGS.include?(slug) || SKIP_ENTRIES_TYPES.include?(type)
+      end
+
+      def clean_heading_name(name)
+        name.remove! %r{\A[\d\.\s]+}
+        name.remove! 'Using '
+        name.remove! %r{\AThe }
+        name.remove! ' (Common Table Expressions)'
+        name
+      end
+
       def get_heading_entries(selector)
-        css(selector).inject [] do |entries, node|
+        css(selector).each_with_object([]) do |node, entries|
           name = node.content
           clean_heading_name(name)
-
-          unless skip_heading?(name)
-            entries << ["#{additional_entry_prefix}: #{name}", node['id']]
-          end
-
-          entries
+          entries << ["#{additional_entry_prefix}: #{name}", node['id']] unless skip_heading?(name)
         end
       end
 
       def get_custom_entries(selector)
-        css(selector).inject [] do |entries, node|
+        css(selector).each_with_object([]) do |node, entries|
           name = node.content
           name.remove! %r{\(.*?\)}m
           name.remove! %r{\[.*?\]}m
@@ -117,14 +163,6 @@ module Docs
             node['id'] = id
             entries << [name, id]
           end
-
-          entries
-        end
-      end
-
-      def get_comparison_entries
-        %w(IS NULL BETWEEN DISTINCT\ FROM).map do |name|
-          ["#{self.name}: #{name}"]
         end
       end
 
@@ -132,22 +170,10 @@ module Docs
         type.dup.gsub!('Functions: ', '') || self.name
       end
 
-      def skip_additional_entries?
-        slug == 'config-setting' || %w(Concurrency\ Control Localization).include?(type)
-      end
-
       def skip_heading?(name)
         %w(Usage\ Patterns Portability Caveats Overview).include?(name) ||
         (type.start_with?('Functions') && slug != 'functions-xml' && name.split.first.upcase!)
       end
-
-      def include_default_entry?
-        !(initial_page? || at_css('.TOC') || config_page?)
-      end
-
-      def config_page?
-        slug.start_with? 'runtime-config'
-      end
     end
   end
 end

+ 1 - 2
lib/docs/filters/postgresql/clean_nav.rb → lib/docs/filters/postgresql/extract_metadata.rb

@@ -1,10 +1,9 @@
 module Docs
   class Postgresql
-    class CleanNavFilter < Filter
+    class ExtractMetadataFilter < Filter
       def call
         extract_up_path
         extract_chapter
-        css('.NAVHEADER', '.NAVFOOTER').remove
         doc
       end
 

+ 32 - 13
lib/docs/scrapers/postgresql.rb

@@ -1,14 +1,13 @@
 module Docs
-  class Postgresql < FileScraper
+  class Postgresql < UrlScraper
     self.name = 'PostgreSQL'
     self.type = 'postgres'
-    self.version = 'up to 9.3.2'
-    self.dir = '/Users/Thibaut/DevDocs/Docs/PostgreSQL'
-    self.base_url = 'http://www.postgresql.org/docs/9.3/static/'
+    self.version = '9.4'
+    self.base_url = "http://www.postgresql.org/docs/#{version}/static/"
     self.root_path = 'reference.html'
-    self.initial_paths = %w(sql.html runtime-config.html charset.html)
+    self.initial_paths = %w(sql.html admin.html)
 
-    html_filters.insert_before 'normalize_urls', 'postgresql/clean_nav'
+    html_filters.insert_before 'normalize_urls', 'postgresql/extract_metadata'
     html_filters.push 'postgresql/clean_html', 'postgresql/entries', 'title'
 
     options[:title] = false
@@ -19,7 +18,6 @@ module Docs
       arrays.html
       rowtypes.html
       rangetypes.html
-      mvcc-intro.html
       transaction-iso.html
       explicit-locking.html
       applevel-consistency.html
@@ -27,7 +25,15 @@ module Docs
       config-setting.html
       locale.html
       collation.html
-      multibyte.html)
+      multibyte.html
+      using-explain.html
+      planner-stats.html
+      explicit-joins.html
+      populate.html
+      non-durability.html
+      logfile-maintenance.html
+      continuous-archiving.html
+      dynamic-trace.html)
 
     options[:only_patterns] = [
       /\Asql\-/,
@@ -37,18 +43,31 @@ module Docs
       /\Aqueries\-/,
       /\Adatatype\-/,
       /\Afunctions\-/,
+      /\Atypeconv\-/,
+      /\Atextsearch\-/,
+      /\Amvcc\-/,
       /\Aindexes\-/,
-      /\Aruntime\-config\-/]
+      /\Aruntime\-config\-/,
+      /\Aauth\-/,
+      /\Aclient\-authentication/,
+      /\Amanage\-ag/,
+      /\Aroutine/,
+      /\Abackup\-/,
+      /\Amonitoring\-/,
+      /\Awal\-/,
+      /\Adisk/,
+      /role/,
+      /recovery/,
+      /standby/]
 
     options[:skip] = %w(
       ddl-others.html
-      runtime-config-custom.html
-      runtime-config-short.html
       functions-event-triggers.html
-      functions-trigger.html)
+      functions-trigger.html
+      textsearch-migration.html)
 
     options[:attribution] = <<-HTML
-      &copy; 1996&ndash;2013 The PostgreSQL Global Development Group<br>
+      &copy; 1996&ndash;2014 The PostgreSQL Global Development Group<br>
       Licensed under the PostgreSQL License.
     HTML
   end