Ver código fonte

Add HTTP documentation from MDN

Thibaut Courouble 9 anos atrás
pai
commit
8a59a660cf

+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2017-01-22",
+    "New <a href=\"/http/\">HTTP</a> documentation (thanks Mozilla)"
+  ], [
     "2016-12-04",
     "New documentations: <a href=\"/sqlite/\">SQLite</a>, <a href=\"/codeception/\">Codeception</a> and <a href=\"/codeceptjs/\">CodeceptJS</a>"
   ], [

+ 1 - 1
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -180,7 +180,7 @@ credits = [
     'Apache',
     'https://raw.githubusercontent.com/apache/cordova-docs/master/LICENSE'
   ], [
-    'CSS<br>DOM<br>HTML<br>JavaScript<br>SVG<br>XPath',
+    'CSS<br>DOM<br>HTTP<br>HTML<br>JavaScript<br>SVG<br>XPath',
     '2005-2017 Mozilla Developer Network and individual contributors',
     'CC BY-SA',
     'https://creativecommons.org/licenses/by-sa/2.5/'

+ 21 - 5
lib/docs/filters/http/clean_html.rb

@@ -2,11 +2,29 @@ module Docs
   class Http
     class CleanHtmlFilter < Filter
       def call
-        if root_page?
-          doc.inner_html = '<h1>Hypertext Transfer Protocol</h1>'
-          return doc
+        current_url.host == 'tools.ietf.org' ? ietf : mdn
+        doc
+      end
+
+      def mdn
+        css('.column-container', '.column-half').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('p > code + strong').each do |node|
+          code = node.previous_element
+          if code.content =~ /\A[\s\d]+\z/
+            code.content = "#{code.content.strip} #{node.content.strip}"
+            node.remove
+          end
         end
 
+        css('strong > code').each do |node|
+          node.parent.before(node.parent.children).remove
+        end
+      end
+
+      def ietf
         doc.child.remove while doc.child.name != 'pre'
 
         css('span.grey', '.invisible', '.noprint', 'a[href^="#page-"]').remove
@@ -33,8 +51,6 @@ module Docs
         html.remove! %r[\.{2,}$]
         html.gsub! %r[(^\n$){3,}], "\n"
         doc.inner_html = %(<div class="_rfc-pre">#{html}</div>)
-
-        doc
       end
     end
   end

+ 31 - 86
lib/docs/filters/http/entries.rb

@@ -2,95 +2,40 @@ module Docs
   class Http
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        name = at_css('h1').content
-        name.remove! %r{\A.+\:}
-        name.remove! %r{\A.+\-\-}
-        "#{rfc}: #{name.strip}"
+        if current_url.host == 'tools.ietf.org'
+          name = at_css('h1').content
+          name.remove! %r{\A.+\:}
+          name.remove! %r{\A.+\-\-}
+          rfc = slug.sub('rfc', 'RFC ')
+          "#{rfc}: #{name.strip}"
+        elsif slug.start_with?('Status')
+          at_css('code').content
+        else
+          name = super
+          name.remove! %r{\A\w+\.}
+          name.remove! 'Basics of HTTP.'
+          name.sub! 'Content-Security-Policy.', 'CSP.'
+          name.sub! '.', ': '
+          name.sub! '1: x', '1.x'
+          name
+        end
       end
 
       def get_type
-        'RFC'
-      end
-
-      def rfc
-        slug.sub('rfc', 'RFC ')
-      end
-
-      SECTIONS = {
-        'rfc2616' => [
-          [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15],
-          [14],
-          []
-        ],
-        'rfc4918' => [
-          [],
-          [11],
-          []
-        ],
-        'rfc7230' => [
-          (2..9).to_a,
-          [],
-          []
-        ],
-        'rfc7231' => [
-          [3, 8, 9],
-          [],
-          [4, 5, 6, 7]
-        ],
-        'rfc7232' => [
-          [5, 6, 7, 8],
-          [2, 3, 4],
-          []
-        ],
-        'rfc7233' => [
-          [5, 6],
-          [2, 3, 4],
-          []
-        ],
-        'rfc7234' => [
-          [3, 6, 7, 8],
-          [4, 5],
-          []
-        ],
-        'rfc7235' => [
-          [2, 5, 6],
-          [3, 4],
-          []
-        ]
-      }
-
-      LEVEL_1 = /\A(\d+)\z/
-      LEVEL_2 = /\A(\d+)\.\d+\z/
-      LEVEL_3 = /\A(\d+)\.\d+\.\d+\z/
-
-      def additional_entries
-        return [] if root_page?
-        type = nil
-
-        css('a[href^="#section-"]').each_with_object([]) do |node, entries|
-          id = node['href'].remove('#')
-          break entries if entries.any? { |e| e[1] == id }
-
-          content = node.next.content.strip
-          content.remove! %r{\s*\.+\d*\z}
-          content.remove! %r{\A[\.\s]+}
-
-          name = "#{content} (#{rfc})"
-          number = node.content.strip
-
-          if number =~ LEVEL_1
-            if SECTIONS[slug][0].include?($1.to_i)
-              entries << [name, id, self.name]
-            end
-
-            type = content.sub(/\ Definitions\z/, 's')
-            type = 'Request Header Fields' if type.include?('Header Fields') && type.exclude?('Response')
-            type = 'Response Status Codes' if type.include?('Status Codes')
-            type = self.name unless type.start_with?('Request ') || type.start_with?('Response ')
-          elsif (number =~ LEVEL_2 && SECTIONS[slug][1].include?($1.to_i)) ||
-                (number =~ LEVEL_3 && SECTIONS[slug][2].include?($1.to_i))
-            entries << [name, id, (name =~ /\A\d\d\d/ ? 'Response Status Codes' : type )]
-          end
+        return 'RFC' if current_url.host == 'tools.ietf.org'
+
+        if slug.start_with?('Headers/Content-Security-Policy')
+          'CSP'
+        elsif slug.start_with?('Headers')
+          'Headers'
+        elsif slug.start_with?('Methods')
+          'Methods'
+        elsif slug.start_with?('Status')
+          'Status'
+        elsif slug.start_with?('Basics_of_HTTP')
+          'Guides: Basics'
+        else
+          'Guides'
         end
       end
     end

+ 3 - 0
lib/docs/filters/mdn/contribute_link.rb

@@ -2,6 +2,8 @@ module Docs
   class Mdn
     class ContributeLinkFilter < Filter
       def call
+        return html if current_url.host != 'developer.mozilla.org'
+
         html << <<-HTML.strip_heredoc
           <div class="_attribution">
             <p class="_attribution-p">
@@ -9,6 +11,7 @@ module Docs
             </p>
           </div>
         HTML
+
         html
       end
     end

+ 34 - 11
lib/docs/scrapers/http.rb

@@ -1,17 +1,40 @@
 module Docs
-  class Http < UrlScraper
+  class Http < Mdn
+    include MultipleBaseUrls
+
     self.name = 'HTTP'
-    self.type = 'rfc'
-    self.base_url = 'https://tools.ietf.org/html/'
-    self.initial_paths = %w(rfc2616 rfc4918 rfc7230 rfc7231
-      rfc7232 rfc7233 rfc7234 rfc7235)
+    self.base_urls = ['https://developer.mozilla.org/en-US/docs/Web/HTTP', 'https://tools.ietf.org/html/']
+
+    html_filters.push 'http/clean_html', 'http/entries', 'title'
 
-    html_filters.push 'http/clean_html', 'http/entries'
+    options[:root_title] = 'HTTP'
+    options[:title] = ->(filter) {
+      filter.current_url.host == 'tools.ietf.org' ? false : filter.default_title
+    }
+    options[:container] = ->(filter) {
+      filter.current_url.host == 'tools.ietf.org' ? '.content' : nil
+    }
+    options[:skip_links] = ->(filter) {
+      filter.current_url.host == 'tools.ietf.org' ? true : false
+    }
+    options[:attribution] = ->(filter) {
+      if filter.current_url.host == 'tools.ietf.org'
+        "&copy; document authors. All rights reserved."
+      else
+        Docs::Mdn.options[:attribution]
+      end
+    }
 
-    options[:container] = '.content'
-    options[:skip_links] = true
-    options[:attribution] = <<-HTML
-      &copy; document authors. All rights reserved.
-    HTML
+    def initial_urls
+      %w(https://developer.mozilla.org/en-US/docs/Web/HTTP
+         https://tools.ietf.org/html/rfc2616
+         https://tools.ietf.org/html/rfc4918
+         https://tools.ietf.org/html/rfc7230
+         https://tools.ietf.org/html/rfc7231
+         https://tools.ietf.org/html/rfc7232
+         https://tools.ietf.org/html/rfc7233
+         https://tools.ietf.org/html/rfc7234
+         https://tools.ietf.org/html/rfc7235)
+    end
   end
 end