소스 검색

Update HTTP documentation and bring back RFC entries

Closes #578.
Thibaut Courouble 8 년 전
부모
커밋
a64d8f3d12
2개의 변경된 파일95개의 추가작업 그리고 3개의 파일을 삭제
  1. 91 3
      lib/docs/filters/http/entries.rb
  2. 4 0
      lib/docs/scrapers/http.rb

+ 91 - 3
lib/docs/filters/http/entries.rb

@@ -6,9 +6,9 @@ module Docs
           name = at_css('h1').content
           name.remove! %r{\A.+\:}
           name.remove! %r{\A.+\-\-}
-          rfc = slug.sub('rfc', 'RFC ')
+          name = 'WebDAV' if name.include?('WebDAV')
           "#{rfc}: #{name.strip}"
-        elsif slug.start_with?('Status')
+        elsif slug.start_with?('Status/')
           at_css('code').content
         else
           name = super
@@ -22,7 +22,7 @@ module Docs
       end
 
       def get_type
-        return 'RFC' if current_url.host == 'tools.ietf.org'
+        return name if current_url.host == 'tools.ietf.org'
 
         if slug.start_with?('Headers/Content-Security-Policy')
           'CSP'
@@ -38,6 +38,94 @@ module Docs
           'Guides'
         end
       end
+
+      def rfc
+        slug.sub('rfc', 'RFC ')
+      end
+
+      SECTIONS = {
+        'rfc2616' => [
+          [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15],
+          [14],
+          []
+        ],
+        'rfc4918' => [
+          [],
+          [11],
+          []
+        ],
+        'rfc7230' => [
+          (2..9).to_a,
+          [],
+          []
+        ],
+        'rfc7231' => [
+          [3, 8, 9],
+          [],
+          [4, 5, 6, 7]
+        ],
+        'rfc7232' => [
+          [5, 6, 7, 8],
+          [2, 3, 4],
+          []
+        ],
+        'rfc7233' => [
+          [5, 6],
+          [2, 3, 4],
+          []
+        ],
+        'rfc7234' => [
+          [3, 6, 7, 8],
+          [4, 5],
+          []
+        ],
+        'rfc7235' => [
+          [2, 5, 6],
+          [3, 4],
+          []
+        ]
+      }
+
+      LEVEL_1 = /\A(\d+)\z/
+      LEVEL_2 = /\A(\d+)\.\d+\z/
+      LEVEL_3 = /\A(\d+)\.\d+\.\d+\z/
+
+      def additional_entries
+        return [] unless current_url.host == 'tools.ietf.org'
+        type = nil
+
+        css('a[href^="#section-"]').each_with_object([]) do |node, entries|
+          id = node['href'].remove('#')
+          break entries if entries.any? { |e| e[1] == id }
+
+          content = node.next.content.strip
+          content.remove! %r{\s*\.+\d*\z}
+          content.remove! %r{\A[\.\s]+}
+
+          name = "#{content} (#{rfc})"
+          number = node.content.strip
+
+          if number =~ LEVEL_1
+            if SECTIONS[slug][0].include?($1.to_i)
+              entries << [name, id, self.name]
+            end
+
+            type = content.sub(/\ Definitions\z/, 's')
+            if type.include?('Header Fields')
+              type = 'Headers'
+            elsif type.include?('Status Codes')
+              type = 'Status'
+            elsif type.include?('Methods')
+              type = 'Methods'
+            else
+              type = self.name
+            end
+          elsif (number =~ LEVEL_2 && SECTIONS[slug][1].include?($1.to_i)) ||
+                (number =~ LEVEL_3 && SECTIONS[slug][2].include?($1.to_i))
+            entries << [name, id, (name =~ /\A\d\d\d/ ? 'Status' : type )]
+          end
+        end
+      end
     end
   end
 end

+ 4 - 0
lib/docs/scrapers/http.rb

@@ -17,6 +17,10 @@ module Docs
     options[:skip_links] = ->(filter) {
       filter.current_url.host == 'tools.ietf.org' ? true : false
     }
+    options[:fix_urls] = ->(url) {
+      url.sub! %r{(Status/\d\d\d)_[A-Z].+}, '\1'
+      url
+    }
     options[:attribution] = ->(filter) {
       if filter.current_url.host == 'tools.ietf.org'
         "&copy; document authors. All rights reserved."