Browse Source

sequelize: finish scraper and filters

Jasper van Merle 6 năm trước cách đây
mục cha
commit
089aa6158a

+ 23 - 2
lib/docs/filters/sequelize/clean_html.rb

@@ -2,16 +2,37 @@ module Docs
   class Sequelize
     class CleanHtmlFilter < Filter
       def call
+        @doc = at_css('.content')
+
         # Clean up the home page
-        if root_page?
+        if root_page? || subpath == "index.html"
           # Remove logo
           css('.manual-user-index > div > div.logo').remove
-          # Convert title to proper H1 element
+
+          # Convert title to proper h1 element
           at_css('.manual-user-index > div > div.sequelize').name = 'h1'
+
           # Remove badges (NPM, Travis, test coverage, etc.)
           css('.manual-user-index > p:nth-child(4)').remove
+
           # Remove image cards pointing to entries of the manual
           css('.manual-cards').remove
+
+          # Pull the header out of it's container
+          header = at_css('h1')
+          header.parent.parent.parent.add_previous_sibling header
+        else
+          # Pull the header out of it's container
+          header = at_css('h1')
+          header.parent.add_previous_sibling header
+        end
+
+        # Remove header notice
+        css('.header-notice').remove
+
+        # Change td in thead to th
+        css('table > thead > tr > td').each do |node|
+          node.name = 'th'
         end
 
         # Add syntax highlighting to code blocks

+ 16 - 9
lib/docs/filters/sequelize/entries.rb

@@ -9,17 +9,24 @@ module Docs
       # Assign the pages to main categories
       def get_type
         if path.start_with?('manual/')
-          type = 'Manual'
-        elsif path.start_with?('file/lib/')
-          type = 'Source files'
+          'Manual'
+        elsif path.include?('lib/data-types')
+          'datatypes'
+        elsif path.include?('lib/errors/validation')
+          'errors/validation'
+        elsif path.include?('lib/errors/database')
+          'errors/database'
+        elsif path.include?('lib/errors/connection')
+          'errors/connection'
+        elsif path.include?('lib/errors')
+          'errors'
+        elsif path.include?('lib/associations')
+          'associations'
+        elsif path.include?('master/variable')
+          'variables'
         else
-          # API Reference pages. The `path` for most of these starts with 'class/lib/',
-          # but there's also 'variable/index' (pseudo-classes), and 'identifiers' (the main index)
-          # so we use an unqualified `else` as a catch-all.
-          type = 'Reference'
+          'classes'
         end
-
-        type
       end
     end
   end

+ 4 - 4
lib/docs/scrapers/sequelize.rb

@@ -3,18 +3,18 @@ module Docs
     self.name = 'Sequelize'
     self.slug = 'sequelize'
     self.type = 'simple'
-    self.release = '5.19.6'
+    self.release = '5.21.1'
     self.base_url = 'https://sequelize.org/master/'
     self.links = {
       home: 'https://sequelize.org/',
-      code: 'https://github.com/sequelize/sequelize/'
+      code: 'https://github.com/sequelize/sequelize'
     }
 
     # List of content filters (to be applied sequentially)
     html_filters.push 'sequelize/entries', 'sequelize/clean_html'
 
-    # Wrapper element that holds the main content
-    options[:container] = '.content'
+    # Skip the source files, the license page and the "Who's using Sequelize" page
+    options[:skip_patterns] = [/\.js\.html/, /manual\/legal\.html/, /manual\/whos-using\.html/]
 
     # License information that appears appears at the bottom of the entry page
     options[:attribution] = <<-HTML