Browse Source

Improve MDN scrapers

Closes #488.
Closes #572.
Thibaut Courouble 8 years ago
parent
commit
476c69e419

+ 19 - 7
lib/docs/core/scraper.rb

@@ -126,7 +126,7 @@ module Docs
           (options[:only] ||= []).concat initial_paths + (root_path? ? [root_path] : ['', '/'])
         end
 
-        options.merge!(additional_options) if respond_to?(:additional_options, true)
+        options.merge!(additional_options)
         options.freeze
       end
     end
@@ -197,18 +197,31 @@ module Docs
       @pipeline = nil
     end
 
+    def additional_options
+      {}
+    end
+
     module FixInternalUrlsBehavior
       def self.included(base)
         base.extend ClassMethods
       end
 
+      def self.prepended(base)
+        class << base
+          prepend ClassMethods
+        end
+      end
+
       module ClassMethods
-        attr_reader :internal_urls
+        def internal_urls
+          @internal_urls
+        end
 
         def store_pages(store)
           instrument 'info.doc', msg: 'Building internal urls...'
           with_internal_urls do
-            instrument 'info.doc', msg: 'Building pages...'
+            puts @internal_urls
+            instrument 'info.doc', msg: 'Continuing...'
             super
           end
         end
@@ -226,7 +239,7 @@ module Docs
       def fetch_internal_urls
         result = []
         build_pages do |page|
-          result << base_url.subpath_to(page[:response_url]) if page[:entries].present?
+          result << page[:subpath] if page[:entries].present?
         end
         result
       end
@@ -240,16 +253,15 @@ module Docs
 
       def additional_options
         if self.class.internal_urls
-          {
+          super.merge! \
             only: self.class.internal_urls.to_set,
             only_patterns: nil,
             skip: nil,
             skip_patterns: nil,
             skip_links: nil,
             fixed_internal_urls: true
-          }
         else
-          {}
+          super
         end
       end
 

+ 11 - 3
lib/docs/core/scrapers/url_scraper.rb

@@ -106,13 +106,21 @@ module Docs
         base.extend ClassMethods
       end
 
+      def self.prepended(base)
+        class << base
+          prepend ClassMethods
+        end
+      end
+
       module ClassMethods
-        attr_reader :redirections
+        def redirections
+          @redirections
+        end
 
         def store_pages(store)
           instrument 'info.doc', msg: 'Fetching redirections...'
           with_redirections do
-            instrument 'info.doc', msg: 'Building pages...'
+            instrument 'info.doc', msg: 'Continuing...'
             super
           end
         end
@@ -145,7 +153,7 @@ module Docs
       end
 
       def additional_options
-        { redirections: self.class.redirections }
+        super.merge! redirections: self.class.redirections
       end
     end
   end

+ 3 - 0
lib/docs/filters/core/internal_urls.rb

@@ -3,9 +3,12 @@
 module Docs
   class InternalUrlsFilter < Filter
     def call
+      result[:subpath] = subpath
+
       unless skip_links?
         follow_links? ? update_and_follow_links : update_links
       end
+
       doc
     end
 

+ 2 - 1
lib/docs/scrapers/mdn/dom.rb

@@ -1,6 +1,7 @@
 module Docs
   class Dom < Mdn
-    include FixRedirectionsBehavior
+    prepend FixInternalUrlsBehavior
+    prepend FixRedirectionsBehavior
 
     self.name = 'DOM'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API'

+ 2 - 1
lib/docs/scrapers/mdn/javascript.rb

@@ -1,6 +1,7 @@
 module Docs
   class Javascript < Mdn
-    include FixRedirectionsBehavior
+    prepend FixInternalUrlsBehavior
+    prepend FixRedirectionsBehavior
 
     self.name = 'JavaScript'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference'

+ 2 - 1
lib/docs/scrapers/mdn/svg.rb

@@ -1,6 +1,7 @@
 module Docs
   class Svg < Mdn
-    include FixRedirectionsBehavior
+    prepend FixInternalUrlsBehavior
+    prepend FixRedirectionsBehavior
 
     self.name = 'SVG'
     self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/SVG'