Ver código fonte

ocaml: polish scraper

Simon Legner 5 anos atrás
pai
commit
7128d2d988

+ 16 - 5
lib/docs/filters/ocaml/clean_html.rb

@@ -2,21 +2,32 @@ module Docs
   class Ocaml
     class CleanHtmlFilter < Filter
       def call
-        css('pre').each do |node|
+
+        css('pre, .caml-example').each do |node|
+          span = node.at_css('span[id]')
+          node['id'] = span['id'] if span
+          node['data-type'] = "#{span.content} [#{at_css('h1').content}]" if span
           node['data-language'] = 'ocaml'
+          node.name = 'pre'
+          node.content = node.content
         end
 
         css('.caml-input').each do |node|
           node.content = '# ' + node.content.strip
         end
 
-        css('.caml-example').each do |node|
-          node.name = 'pre'
-          node.traverse { |n| n.remove if n.text? && n.text !~ /\S/ }
+        css('.maintitle *[style]').each do |node|
+          node.remove_attribute 'style'
+        end
 
-          node['data-language'] = 'ocaml'
+        css('h1').each do |node|
+          node.content = node.content
+          table = node.ancestors('table.center')
+          table.first.before(node).remove if table.present?
         end
 
+        css('.navbar').remove
+
         doc
       end
     end

+ 3 - 9
lib/docs/filters/ocaml/entries.rb

@@ -37,11 +37,8 @@ module Docs
 
         module_node = css('h1').at_css('span')
 
-        css('pre').each do |node|
-          next unless span = node.at_css('span')
-          if span['id'].nil?
-            next
-          elsif span['id'].start_with?('VAL')
+        css('pre > span[id]').each do |span|
+          if span['id'].start_with?('VAL')
             entry_type = 'Values'
           elsif span['id'].start_with?('MODULE')
             entry_type = 'Modules'
@@ -52,12 +49,9 @@ module Docs
           end
 
           name = span.content
-          if not module_node.nil?
-            name = "#{name} [#{module_node.content}]"
-          end
+          name += " [#{module_node.content}]" unless module_node.nil?
           entries << [name, span['id'], entry_type]
         end
-
         entries
       end
     end