Browse Source

Refactor PHP scraper with new initial_paths option

Thibaut 12 years ago
parent
commit
b66d6d93d6
2 changed files with 11 additions and 18 deletions
  1. 1 1
      lib/docs/filters/php/entries.rb
  2. 10 17
      lib/docs/scrapers/php.rb

+ 1 - 1
lib/docs/filters/php/entries.rb

@@ -106,7 +106,7 @@ module Docs
       end
       end
 
 
       def include_default_entry?
       def include_default_entry?
-        Php::INDEX_PATHS.exclude?(subpath) && doc.at_css('.reference', '.refentry', '.sect1')
+        !initial_page? && doc.at_css('.reference', '.refentry', '.sect1')
       end
       end
     end
     end
   end
   end

+ 10 - 17
lib/docs/scrapers/php.rb

@@ -5,18 +5,7 @@ module Docs
     self.version = 'up to 5.5.6'
     self.version = 'up to 5.5.6'
     self.base_url = 'http://www.php.net/manual/en/'
     self.base_url = 'http://www.php.net/manual/en/'
     self.root_path = 'index.html'
     self.root_path = 'index.html'
-
-    # Downloaded from php.net/download-docs.php
-    self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
-
-    html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
-    text_filters.push 'php/fix_urls'
-
-    options[:title] = false
-    options[:root_title] = 'PHP: Hypertext Preprocessor'
-
-    INDEX_PATHS = %w(
-      index.html
+    self.initial_paths = %w(
       funcref.html
       funcref.html
       refs.database.html
       refs.database.html
       set.mysqlinfo.html
       set.mysqlinfo.html
@@ -25,11 +14,15 @@ module Docs
       reserved.interfaces.html
       reserved.interfaces.html
       reserved.variables.html)
       reserved.variables.html)
 
 
-    options[:skip_links] = ->(filter) do
-      INDEX_PATHS.exclude?(filter.subpath)
-    end
+    # Downloaded from php.net/download-docs.php
+    self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
+
+    html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
+    text_filters.push 'php/fix_urls'
 
 
-    options[:only] = INDEX_PATHS.dup
+    options[:title] = false
+    options[:root_title] = 'PHP: Hypertext Preprocessor'
+    options[:skip_links] = ->(filter) { !filter.initial_page? }
 
 
     options[:only_patterns] = [
     options[:only_patterns] = [
       /\Aclass\./,
       /\Aclass\./,
@@ -49,7 +42,7 @@ module Docs
       sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish
       sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish
       xml xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib)
       xml xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib)
 
 
-    options[:only].concat BOOKS.map { |s| "book.#{s}.html" }
+    options[:only] = BOOKS.map { |s| "book.#{s}.html" }
 
 
     options[:skip] = %w(
     options[:skip] = %w(
       control-structures.intro.html
       control-structures.intro.html