Browse Source

Merge pull request #2569 from cpmsmith/rust-improvements

Improve Rust scraper
Simon Legner 2 months ago
parent
commit
ad9b58fcc6

+ 15 - 3
assets/javascripts/lib/page.js

@@ -271,13 +271,25 @@ var onclick = function (event) {
   }
 
   let link = $.eventTarget(event);
-  while (link && link.tagName !== "A") {
+  while (link && !(link.tagName === "A" || link.tagName === "a")) {
     link = link.parentNode;
   }
 
-  if (link && !link.target && isSameOrigin(link.href)) {
+  if (!link) return;
+
+  // If the `<a>` is in an SVG, its attributes are `SVGAnimatedString`s
+  // instead of strings
+  let href = link.href instanceof SVGAnimatedString
+    ? new URL(link.href.baseVal, location.href).href
+    : link.href;
+  let target = link.target instanceof SVGAnimatedString
+    ? link.target.baseVal
+    : link.target;
+
+  if (!target && isSameOrigin(href)) {
     event.preventDefault();
-    let path = link.pathname + link.search + link.hash;
+    let parsedHref = new URL(href);
+    let path = parsedHref.pathname + parsedHref.search + parsedHref.hash;
     path = path.replace(/^\/\/+/, "/"); // IE11 bug
     page.show(path);
   }

+ 10 - 0
assets/javascripts/lib/util.js

@@ -353,6 +353,16 @@ $.lockScroll = function (el, fn) {
   }
 };
 
+// If `el` is inside any `<details>` elements, expand them.
+$.openDetailsAncestors = function (el) {
+  while (el) {
+    if (el.tagName === "DETAILS") {
+      el.open = true;
+    }
+    el = el.parentElement;
+  }
+}
+
 let smoothScroll =
   (smoothStart =
   smoothEnd =

+ 1 - 0
assets/javascripts/views/content/content.js

@@ -114,6 +114,7 @@ app.views.Content = class Content extends app.View {
       $.scrollToWithImageLock(el, this.scrollEl, "top", {
         margin: this.scrollEl === this.el ? 0 : $.offset(this.el).top,
       });
+      $.openDetailsAncestors(el);
       $.highlight(el, { className: "_highlight" });
     } else {
       this.scrollTo(this.scrollMap[this.routeCtx.state.id]);

+ 69 - 0
assets/stylesheets/pages/_rust.scss

@@ -18,4 +18,73 @@
     float: right;
     margin-left: .5rem;
   }
+
+  .grammar-container { @extend %note, %note-gray; }
+
+  /* Railroad styles from:
+   * https://github.com/rust-lang/reference/blob/f82156b8c3a784158ce609bebfa3a77b5ae8a5ed/theme/reference.css#L683-L734
+   * Plus CSS variables inheriting from DevDocs variables
+   */
+
+  svg.railroad {
+    --railroad-background-color: var(--boxBackground);
+    --railroad-background-image:
+    linear-gradient(to right, rgb(from currentColor r g b / 0.1) 1px, transparent 1px),
+    linear-gradient(to bottom, rgb(from currentColor r g b / 0.1) 1px, transparent 1px);
+    --railroad-path-stroke: currentColor;
+    --railroad-rect-stroke: currentColor;
+    --railroad-rect-fill: var(--noteBackground);
+    --railroad-text-fill: currentColor;
+
+    background-color: var(--railroad-background-color);
+    background-size: 15px 15px;
+    background-image: var(--railroad-background-image);
+  }
+
+  svg.railroad rect.railroad_canvas {
+    stroke-width: 0px;
+    fill: none;
+  }
+
+  svg.railroad path {
+    stroke-width: 3px;
+    stroke: var(--railroad-path-stroke);
+    fill: none;
+  }
+
+  svg.railroad .debug {
+    stroke-width: 1px;
+    stroke: red;
+  }
+
+  svg.railroad text {
+    font: 14px monospace;
+    text-anchor: middle;
+    fill: var(--railroad-text-fill);
+  }
+
+  svg.railroad .nonterminal text {
+    font-weight: bold;
+  }
+
+  svg.railroad text.comment {
+    font: italic 12px monospace;
+  }
+
+  svg.railroad rect {
+    stroke-width: 3px;
+    stroke: var(--railroad-rect-stroke);
+    fill: var(--railroad-rect-fill);
+  }
+
+  svg.railroad g.labeledbox>rect {
+    stroke-width: 1px;
+    stroke: grey;
+    stroke-dasharray: 5px;
+    fill: rgba(90, 90, 150, .1);
+  }
+
+  svg.railroad g.exceptbox > rect {
+    fill:rgba(245, 160, 125, .1);
+  }
 }

+ 1 - 1
lib/docs/filters/core/clean_text.rb

@@ -2,7 +2,7 @@
 
 module Docs
   class CleanTextFilter < Filter
-    EMPTY_NODES_RGX = /<(?!td|th|iframe|mspace)(\w+)[^>]*>[[:space:]]*<\/\1>/
+    EMPTY_NODES_RGX = /<(?!td|th|iframe|mspace|rect|path|ellipse|line|polyline)(\w+)[^>]*>[[:space:]]*<\/\1>/
 
     def call
       return html if context[:clean_text] == false

+ 5 - 2
lib/docs/filters/core/normalize_paths.rb

@@ -7,8 +7,11 @@ module Docs
       result[:store_path] = store_path
 
       css('a').each do |link|
-        next unless (href = link['href']) && relative_url_string?(href)
-        link['href'] = normalize_href(href)
+        href = link['href']
+        link['href'] = normalize_href(href) if href && relative_url_string?(href)
+
+        xlink_href = link['xlink:href']
+        link['xlink:href'] = normalize_href(xlink_href) if xlink_href && relative_url_string?(xlink_href)
       end
 
       doc

+ 28 - 2
lib/docs/filters/rust/clean_html.rb

@@ -4,9 +4,9 @@ module Docs
   class Rust
     class CleanHtmlFilter < Filter
       def call
-        if slug.start_with?('book') ||  slug.start_with?('reference')
+        if slug.start_with?('book') ||  slug.start_with?('reference') || slug.start_with?('error_codes')
           @doc = at_css('#content main')
-        elsif slug == 'error-index'
+        elsif slug.start_with?('error_codes')
           css('.error-undescribed').remove
 
           css('.error-described').each do |node|
@@ -32,6 +32,8 @@ module Docs
 
         css('.doc-anchor').remove
 
+        css('.rule-link').remove
+
         # Fix notable trait sections
         css('.method, .rust.trait').each do |node|
           traitSection = node.at_css('.notable-traits')
@@ -55,6 +57,30 @@ module Docs
           node.before(node.children).remove
         end
 
+        css('button.grammar-toggle-railroad').remove
+        css('.grammar-container').each do |node|
+          next_element = node.next_element
+          if next_element && next_element['class'] && next_element['class'].include?('grammar-railroad')
+            next_element.remove
+            node.add_child(next_element)
+          end
+
+          node.css('[onclick="show_railroad()"]').each do |subnode|
+            subnode.remove_attribute('onclick')
+          end
+
+          # We changed this to a <pre> in parse(), changing it back here
+          node.name = 'div'
+          node.css('.grammar-literal').each do |literal|
+            literal.name = 'code'
+          end
+        end
+
+        css('.grammar-railroad').each do |node|
+          node.name = 'details'
+          node.prepend_child("<summary>Syntax diagram</summary>")
+        end
+
         css('a.header').each do |node|
           unless node.first_element_child.nil?
             node.first_element_child['id'] = node['name'] || node['id']

+ 16 - 11
lib/docs/filters/rust/entries.rb

@@ -3,13 +3,22 @@ module Docs
     class EntriesFilter < Docs::EntriesFilter
 
       def get_name
-        if slug.start_with?('book') || slug.start_with?('reference')
-          name = at_css("h2", "h1")
-          ch1 = slug[/ch(\d+)-(\d+)/, 1]
-          ch2 = slug[/ch(\d+)-(\d+)/, 2]
+        if slug.start_with?('book')
+          name = at_css('main h1', 'main h2')
+
+          if slug.start_with?('book/appendix')
+            return name ? name.content : 'Appendix'
+          end
+
+          ch1 = slug[/ch(\d+)-(\d+)/, 1] || '00'
+          ch2 = slug[/ch(\d+)-(\d+)/, 2] || '00'
           name ? "#{ch1}.#{ch2}. #{name.content}" : 'Introduction'
-        elsif slug == 'error-index'
+        elsif slug.start_with?('reference')
+          at_css('main h1').content
+        elsif slug == 'error_codes/error-index'
           'Compiler Errors'
+        elsif slug.start_with?('error_codes')
+          slug.split('/').last.upcase
         else
           at_css('main h1').at_css('button')&.remove
           name = at_css('main h1').content.remove(/\A.+\s/).remove('⎘')
@@ -26,7 +35,7 @@ module Docs
           'Guide'
         elsif slug.start_with?('reference')
           'Reference'
-        elsif slug == 'error-index'
+        elsif slug.start_with?('error_codes')
           'Compiler Errors'
         else
           path = name.split('::')
@@ -40,12 +49,8 @@ module Docs
       end
 
       def additional_entries
-        if slug.start_with?('book') || slug.start_with?('reference')
+        if slug.start_with?('book') || slug.start_with?('reference') || slug.start_with?('error_codes')
           []
-        elsif slug == 'error-index'
-          css('.error-described h2.section-header').each_with_object [] do |node, entries|
-            entries << [node.content, node['id']] unless node.content.include?('Note:')
-          end
         else
           css('.method')
             .each_with_object({}) { |node, entries|

+ 6 - 3
lib/docs/scrapers/rust.rb

@@ -3,13 +3,13 @@
 module Docs
   class Rust < UrlScraper
     self.type = 'rust'
-    self.release = '1.88.0'
+    self.release = '1.90.0'
     self.base_url = 'https://doc.rust-lang.org/'
     self.root_path = 'book/index.html'
     self.initial_paths = %w(
       reference/introduction.html
       std/index.html
-      error-index.html)
+      error_codes/error-index.html)
     self.links = {
       home: 'https://www.rust-lang.org/',
       code: 'https://github.com/rust-lang/rust'
@@ -21,7 +21,8 @@ module Docs
       /\Abook\//,
       /\Areference\//,
       /\Acollections\//,
-      /\Astd\// ]
+      /\Astd\//,
+      /\Aerror_codes\//, ]
 
     options[:skip] = %w(book/README.html book/ffi.html)
     options[:skip_patterns] = [/(?<!\.html)\z/, /\/print\.html/, /\Abook\/second-edition\//]
@@ -56,6 +57,8 @@ module Docs
 
     def parse(response) # Hook here because Nokogori removes whitespace from headings
       response.body.gsub! %r{<h[1-6] class="code-header">}, '<pre class="code-header">'
+      # And the reference uses whitespace for indentation in grammar definitions
+      response.body.gsub! %r{<div class="grammar-container">([\W\w]+?)</div>}, '<pre class="grammar-container">\1</pre>'
       super
     end
   end