Ver código fonte

Improve D scraper

Thibaut Courouble 8 anos atrás
pai
commit
8225e1d7e6

+ 2 - 0
assets/stylesheets/pages/_d.scss

@@ -2,6 +2,8 @@
   h2 { @extend %block-heading; }
   h3, .d_decl { @extend %block-label, %label-blue; }
   .d_decl { @extend %code; }
+  .d_decl > small { color: $textColorLight; }
+  .d_decl > strong { font-weight: $bolderFontWeight; }
 
   p > code, li > code, td > code, dd > code { @extend %label; }
 

+ 28 - 5
lib/docs/filters/d/clean_html.rb

@@ -28,17 +28,40 @@ module Docs
           node.replace("<dl><dt>#{dt}</dt><dd>#{dd}</dd></dl>")
         end
 
+        css('.description > .blankline:first-child + .quickindex').each do |node|
+          node.next_element.remove if node.next_element && node.next_element['class'] == 'blankline'
+          node.previous_element.remove
+          node.parent.before(node)
+        end
+
         css('div.summary', 'div.description').each do |node|
           node.name = 'p' unless node.at_css('p')
           node.css('.blankline').each { |n| n.replace('<br><br>') }
         end
 
         css('.d_decl').each do |node|
-          node['id'] = node.at_css('.def-anchor')['id'].remove(/\A\./)
-          constraints = node.css('.constraint').remove
-          node.content = node.content.strip
-          node.inner_html = node.inner_html.gsub(/;\s*/, '<br>').remove(/<br>\z/)
-          node << "<br><br>  Constraints:<br>    #{constraints.map(&:content).join('<br>    ')}" unless constraints.empty?
+          node['id'] ||= node.at_css('.quickindex[id]')['id'].remove('quickindex.')
+
+          node.css('.def-anchor[id]').each do |n|
+            n.next_element['id'] ||= n['id']
+          end
+
+          node.css('.constraint').each do |n|
+            n.content = "  Constraints: #{n.content}#{n.next.remove.content if n.next.text?}"
+            n.name = 'small'
+            n.remove_attribute('class')
+          end
+
+          node.css('code[id]').each do |n|
+            n.name = 'strong'
+            n.remove_attribute('class')
+          end
+
+          node.css('*').each do |n|
+            n.before(n.children).remove unless n.name == 'br' || n.name == 'small' || n.name == 'strong'
+          end
+
+          node.inner_html = node.inner_html.remove(/<br>\z/)
         end
 
         css('pre').each do |node|

+ 1 - 5
lib/docs/filters/d/entries.rb

@@ -29,14 +29,10 @@ module Docs
 
         entries = []
 
-        css('.book > tr > td > a').each do |node|
-          entries << ["#{self.name}.#{node.content}", node['href'].remove(/\A#/).remove(/\A\./)]
-        end
-
         if entries.empty?
           css('.quickindex[id]').each do |node|
             name = node['id'].remove(/quickindex\.?/)
-            next if name.empty? || name =~ /\.\d+\z/
+            next if name.empty? || name =~ /\.\d+\z/ || name =~ /\A([^\.]+)\.\1\z/
             entries << ["#{self.name}.#{name}", name]
           end
         end