瀏覽代碼

Improve Rust scraper

Thibaut Courouble 7 年之前
父節點
當前提交
7b7aa34b70
共有 3 個文件被更改,包括 43 次插入6 次删除
  1. 10 1
      assets/stylesheets/pages/_rust.scss
  2. 32 4
      lib/docs/filters/rust/clean_html.rb
  3. 1 1
      lib/docs/scrapers/rust.rb

+ 10 - 1
assets/stylesheets/pages/_rust.scss

@@ -3,9 +3,18 @@
 
   h4 { @extend %block-label; }
   .docblock { margin-left: 1em; }
+  div.information, div.important-traits {
+    @extend %note;
+
+    > pre { margin: .5rem 0; }
+  }
 
   div.stability { margin-bottom: 1em; }
   em.stab, span.stab { @extend %label; }
   em.stab.unstable, span.stab.unstable { @extend %label-orange; }
-  .since, .out-of-band { float: right; }
+  .out-of-band { float: right; }
+  .since, .srclink {
+    float: right;
+    margin-left: .5rem;
+  }
 }

+ 32 - 4
lib/docs/filters/rust/clean_html.rb

@@ -2,8 +2,6 @@ module Docs
   class Rust
     class CleanHtmlFilter < Filter
       def call
-        puts subpath if at_css('#versioninfo')
-
         if slug.start_with?('book') ||  slug.start_with?('reference')
           @doc = at_css('#content main')
         elsif slug == 'error-index'
@@ -29,12 +27,16 @@ module Docs
 
         css('.rusttest', '.test-arrow', 'hr').remove
 
+        css('.docblock.attributes').each do |node|
+          node.remove if node.content.include?('#[must_use]')
+        end
+
         css('a.header').each do |node|
           node.first_element_child['id'] = node['name'] || node['id']
           node.before(node.children).remove
         end
 
-        css('.docblock > h1').each { |node| node.name = 'h4' }
+        css('.docblock > h1:not(.section-header)').each { |node| node.name = 'h4' }
         css('h2.section-header').each { |node| node.name = 'h3' }
         css('h1.section-header').each { |node| node.name = 'h2' }
 
@@ -44,7 +46,7 @@ module Docs
           end
         end
 
-        css('> .impl-items', '> .docblock', 'pre > pre').each do |node|
+        css('> .impl-items', '> .docblock', 'pre > pre', '.tooltiptext', '.tooltip').each do |node|
           node.before(node.children).remove
         end
 
@@ -65,6 +67,32 @@ module Docs
         doc.first_element_child.name = 'h1' if doc.first_element_child.name = 'h2'
         at_css('h1').content = 'Rust Documentation' if root_page?
 
+        css('.table-display').each do |node|
+          node.css('td').each do |td|
+            node.before(td.children)
+          end
+          node.remove
+        end
+
+        css('h2 .important-traits', 'h3 .important-traits', 'h4 .important-traits').each do |node|
+          content = node.at_css('.content.hidden .content')
+          node.at_css('.content.hidden').replace(content) if content
+          node.parent.after(node)
+        end
+
+        css('code.content').each do |node|
+          node.name = 'pre'
+          node.css('.fmt-newline').each do |line|
+            line.inner_html = line.inner_html + "\n"
+          end
+          node.inner_html = node.inner_html.gsub('<br>', "\n")
+          node.content = node.content
+        end
+
+        css('.since + .srclink').each do |node|
+          node.previous_element.before(node)
+        end
+
         doc
       end
     end

+ 1 - 1
lib/docs/scrapers/rust.rb

@@ -1,7 +1,7 @@
 module Docs
   class Rust < UrlScraper
     self.type = 'rust'
-    self.release = '1.28.0'
+    self.release = '1.29.1'
     self.base_url = 'https://doc.rust-lang.org/'
     self.root_path = 'book/second-edition/index.html'
     self.initial_paths = %w(