Browse Source

Finish Nim scraper

Thibaut Courouble 8 years ago
parent
commit
a4ba32ebca

BIN
assets/images/docs-2.png


BIN
assets/images/docs-2@2x.png


+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2017-09-03",
+    "New documentation: <a href=\"/nim/\">Nim</a>"
+  ], [
     "2017-07-23",
     "New documentation: <a href=\"/godot/\">Godot</a>"
   ], [

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -423,6 +423,11 @@ credits = [
     '2009-2016 Xiaozhe Wang (chaoslawful)<br>&copy; 2009-2017 Yichun "agentzh" Zhang (章亦春), OpenResty Inc.',
     'BSD',
     'https://github.com/openresty/lua-nginx-module#copyright-and-license'
+  ], [
+    'Nim',
+    '2006-2017 Andreas Rumpf',
+    'MIT',
+    'https://github.com/nim-lang/Nim#license'
   ], [
     'Node.js',
     'Joyent, Inc. and other Node contributors<br>Node.js is a trademark of Joyent, Inc.',

+ 40 - 3
assets/javascripts/vendor/prism.js

@@ -1,4 +1,4 @@
-/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+perl+php+python+crystal+rust+scss+sql+typescript */
+/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
 var _self = (typeof window !== 'undefined')
 	? window   // if in browser
 	: (
@@ -568,6 +568,9 @@ Prism.languages.markup = {
 	'entity': /&#?[\da-z]{1,8};/i
 };
 
+Prism.languages.markup['tag'].inside['attr-value'].inside['entity'] =
+	Prism.languages.markup['entity'];
+
 // Plugin to make entity title show the real entity, idea by Roman Komarov
 Prism.hooks.add('wrap', function(env) {
 
@@ -664,7 +667,7 @@ Prism.languages.clike = {
 
 Prism.languages.javascript = Prism.languages.extend('clike', {
 	'keyword': /\b(as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield)\b/,
-	'number': /\b-?(0x[\dA-Fa-f]+|0b[01]+|0o[0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/,
+	'number': /\b-?(0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|\d*\.?\d+([Ee][+-]?\d+)?|NaN|Infinity)\b/,
 	// Allow for all non-ASCII characters (See http://stackoverflow.com/a/2008444)
 	'function': /[_$a-zA-Z\xA0-\uFFFF][_$a-zA-Z0-9\xA0-\uFFFF]*(?=\()/i,
 	'operator': /-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}/
@@ -672,7 +675,7 @@ Prism.languages.javascript = Prism.languages.extend('clike', {
 
 Prism.languages.insertBefore('javascript', 'keyword', {
 	'regex': {
-		pattern: /(^|[^/])\/(?!\/)(\[.+?]|\\.|[^/\\\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/,
+		pattern: /(^|[^/])\/(?!\/)(\[[^\]\r\n]+]|\\.|[^/\\\[\r\n])+\/[gimyu]{0,5}(?=\s*($|[\r\n,.;})]))/,
 		lookbehind: true,
 		greedy: true
 	}
@@ -710,6 +713,7 @@ if (Prism.languages.markup) {
 }
 
 Prism.languages.js = Prism.languages.javascript;
+
 Prism.languages.c = Prism.languages.extend('clike', {
 	'keyword': /\b(asm|typeof|inline|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)\b/,
 	'operator': /\-[>-]?|\+\+?|!=?|<<?=?|>>?=?|==?|&&?|\|?\||[~^%?*\/]/,
@@ -1245,6 +1249,39 @@ Prism.languages.nginx = Prism.languages.extend('clike', {
 Prism.languages.insertBefore('nginx', 'keyword', {
         'variable': /\$[a-z_]+/i
 });
+Prism.languages.nim = {
+	'comment': /#.*/,
+	// Double-quoted strings can be prefixed by an identifier (Generalized raw string literals)
+	// Character literals are handled specifically to prevent issues with numeric type suffixes
+	'string': {
+		pattern: /(?:(?:\b(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+)?(?:"""[\s\S]*?"""(?!")|"(?:\\[\s\S]|""|[^"\\])*")|'(?:\\(?:\d+|x[\da-fA-F]{2}|.)|[^'])')/,
+		greedy: true
+	},
+	// The negative look ahead prevents wrong highlighting of the .. operator
+	'number': /\b(?:0[xXoObB][\da-fA-F_]+|\d[\d_]*(?:(?!\.\.)\.[\d_]*)?(?:[eE][+-]?\d[\d_]*)?)(?:'?[iuf]\d*)?/,
+	'keyword': /\b(?:addr|as|asm|atomic|bind|block|break|case|cast|concept|const|continue|converter|defer|discard|distinct|do|elif|else|end|enum|except|export|finally|for|from|func|generic|if|import|include|interface|iterator|let|macro|method|mixin|nil|object|out|proc|ptr|raise|ref|return|static|template|try|tuple|type|using|var|when|while|with|without|yield)\b/,
+	'function': {
+		pattern: /(?:(?!\d)(?:\w|\\x[8-9a-fA-F][0-9a-fA-F])+|`[^`\r\n]+`)\*?(?:\[[^\]]+\])?(?=\s*\()/,
+		inside: {
+			'operator': /\*$/
+		}
+	},
+	// We don't want to highlight operators inside backticks
+	'ignore': {
+		pattern: /`[^`\r\n]+`/,
+		inside: {
+			'punctuation': /`/
+		}
+	},
+	'operator': {
+		// Look behind and look ahead prevent wrong highlighting of punctuations [. .] {. .} (. .)
+		// but allow the slice operator .. to take precedence over them
+		// One can define his own operators in Nim so all combination of operators might be an operator.
+		pattern: /(^|[({\[](?=\.\.)|(?![({\[]\.).)(?:(?:[=+\-*\/<>@$~&%|!?^:\\]|\.\.|\.(?![)}\]]))+|\b(?:and|div|of|or|in|is|isnot|mod|not|notin|shl|shr|xor)\b)/m,
+		lookbehind: true
+	},
+	'punctuation': /[({\[]\.|\.[)}\]]|[`(){}\[\],:]/
+};
 Prism.languages.perl = {
 	'comment': [
 		{

+ 0 - 1
assets/stylesheets/application-dark.css.scss

@@ -67,7 +67,6 @@
         'pages/meteor',
         'pages/modernizr',
         'pages/moment',
-        'pages/nim',
         'pages/nginx',
         'pages/node',
         'pages/npm',

+ 0 - 1
assets/stylesheets/application.css.scss

@@ -67,7 +67,6 @@
         'pages/meteor',
         'pages/modernizr',
         'pages/moment',
-        'pages/nim',
         'pages/nginx',
         'pages/node',
         'pages/npm',

+ 6 - 1
assets/stylesheets/components/_prism.scss

@@ -77,6 +77,11 @@
   }
 }
 
-.token.important {
+.token.important,
+.token.bold {
   font-weight: $boldFontWeight;
 }
+
+.token.italic {
+  font-style: italic;
+}

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -173,3 +173,4 @@
 ._icon-electron:before      { background-position: -2rem -2rem; @extend %doc-icon-2; }
 ._icon-falcon:before        { background-position: -3rem -2rem; @extend %doc-icon-2; }
 ._icon-godot:before         { background-position: -4rem -2rem; @extend %doc-icon-2; }
+._icon-nim:before           { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }

+ 0 - 81
assets/stylesheets/pages/_nim.scss

@@ -1,81 +0,0 @@
-._nim {
-  @extend %simple;
-
-  @if $style == 'dark' {
-    span.DecNumber { color: #AE81FF; }
-    span.BinNumber { color: #AE81FF; }
-    span.HexNumber { color: #AE81FF; }
-    span.OctNumber { color: #AE81FF; }
-    span.FloatNumber { color: #AE81FF; }
-    span.Identifier { color: #F8F8F2; }
-    span.Keyword { font-weight: 600; color: #F92672; }
-    span.StringLit { color: #E6DB74; }
-    span.LongStringLit { color: #E6DB74; }
-    span.CharLit { color: #E6DB74; }
-    span.EscapeSequence { color: white; }
-    span.Operator { color: white; }
-    span.Punctuation {color: white; }
-    span.Comment, span.LongComment {
-      font-style: italic;
-      font-weight: 400;
-      color: #75715E; }
-
-    span.RegularExpression { color: darkviolet; }
-    span.TagStart { color: #F92672; }
-    span.TagEnd { color: #F92672; }
-    span.Key { color: #AE81FF; }
-    span.Value { color: #AE81FF; }
-    span.RawData { color: #a4255b; }
-    span.Assembler { color: #AE81FF; }
-    span.Preprocessor { color: #AE81FF; }
-    span.Directive { color: #AE81FF; }
-
-    span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
-    span.Other { color: white; }
-
-    /* Pop type, const, proc, and iterator defs in nim def blocks */
-    dt pre > span.Identifier, dt pre > span.Operator { color: #529B2F; font-weight: 700; }
-  } @else {
-    span.DecNumber { color: #252dbe; }
-    span.BinNumber { color: #252dbe; }
-    span.HexNumber { color: #252dbe; }
-    span.OctNumber { color: #252dbe; }
-    span.FloatNumber { color: #252dbe; }
-    span.Identifier { color: #3b3b3b; }
-    span.Keyword { font-weight: 600; color: #5e8f60; }
-    span.StringLit { color: #a4255b; }
-    span.LongStringLit { color: #a4255b; }
-    span.CharLit { color: #a4255b; }
-    span.EscapeSequence { color: black; }
-    span.Operator { color: black; }
-    span.Punctuation {color: black; }
-    span.Comment, span.LongComment {
-      font-style: italic;
-      font-weight: 400;
-      color: #484a86; }
-
-    span.RegularExpression { color: darkviolet; }
-    span.TagStart { color: darkviolet; }
-    span.TagEnd { color: darkviolet; }
-    span.Key { color: #252dbe; }
-    span.Value { color: #252dbe; }
-    span.RawData { color: #a4255b; }
-    span.Assembler { color: #252dbe; }
-    span.Preprocessor { color: #252dbe; }
-    span.Directive { color: #252dbe; }
-
-    span.Command, span.Rule, span.Hyperlink, span.Label, span.Reference,
-    span.Other { color: black; }
-
-    /* Pop type, const, proc, and iterator defs in nim def blocks */
-    dt pre > span.Identifier, dt pre > span.Operator { color: #155da4; font-weight: 700; }
-  }
-  dt pre > span.Identifier ~ span.Identifier, dt pre > span.Operator ~ span.Identifier {
-    color: inherit;
-    font-weight: inherit; }
-
-  dt pre > span.Operator ~ span.Identifier, dt pre > span.Operator ~ span.Operator {
-    color: inherit;
-    font-weight: inherit; }
-
-}

+ 1 - 0
assets/stylesheets/pages/_simple.scss

@@ -38,6 +38,7 @@
 ._markdown,
 ._mocha,
 ._mongoose,
+._nim,
 ._redux,
 ._requirejs,
 ._typescript,

+ 46 - 12
lib/docs/filters/nim/clean_html.rb

@@ -4,26 +4,60 @@ module Docs
       def call
         @doc = at_css('#documentId .container')
 
-        css('.docinfo').remove
+        css('.docinfo', '.footer', 'blockquote > p:empty', '.link-seesrc').remove
 
-        content = at_css('#content')
-        if content != nil
-          at_css('#content').remove_attribute('class')
-          @doc.add_child(at_css('#content').inner_html) 
+        css('h1:not(.title), h2, h3, h4').each do |node|
+          node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
         end
 
-        css('> div.row').remove
+        if content = at_css('#content')
+          content.prepend_child at_css('h1.title')
+          @doc = content
+        end
 
-        css('pre').each do |node|
-          node['data-language'] = 'nim'
+        if root_page?
+          at_css('h1').content = 'Nim Documentation'
         end
 
-        # remove link from headers
         css('h1 > a', 'h2 > a', 'h3 > a', 'h4 > a').each do |node|
-          node.parent['id'] = node['id']
-          node.parent.content = node.content
+          node.parent['id'] = node['id'] if node['id']
+          node.before(node.children).remove
+        end
+
+        css('a[name]').each do |node|
+          node.next_element['id'] = node['name']
+          node.remove
+        end
+
+        css('pre').each do |node|
+          node.content = node.content.strip
+          node['data-language'] = 'nim' unless node.content =~ /\A[\w\-\_\:\=\ ]+\z/
         end
-        
+
+        css('tt').each do |node|
+          node.name = 'code'
+        end
+
+        css('cite').each do |node|
+          node.name = 'em'
+        end
+
+        css('.section').each do |node|
+          node.first_element_child['id'] = node['id'] if node['id']
+          node.before(node.children).remove
+        end
+
+        css('span.pre').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('blockquote > pre:only-child', 'blockquote > dl:only-child', 'blockquote > table').each do |node|
+          node.parent.before(node.parent.children).remove
+        end
+
+        css('a', 'dl', 'table', 'code').remove_attr('class')
+        css('table').remove_attr('border')
+
         doc
       end
     end

+ 44 - 31
lib/docs/filters/nim/entries.rb

@@ -1,49 +1,62 @@
 module Docs
   class Nim
     class EntriesFilter < Docs::EntriesFilter
-      def get_type
-        at_css('h1').content
+      def get_name
+        name = at_css('h1').content
+        name.remove! 'Module '
+        name.remove! ' User Guide'
+        name.remove! ' User\'s manual'
+        name.remove! %r{ \-.*}
+        name.strip!
+        name
       end
 
-      def get_name
-        at_css('h1').content
+      def get_type
+        if name.include?('Tutorial')
+          'Tutorial'
+        elsif slug == 'manual'
+          'Manual'
+        elsif at_css('h1').content.include?('Module ')
+          name
+        else
+          'Reference'
+        end
       end
 
       def additional_entries
         entries = []
-        if get_name.start_with? 'Module '
-          module_name = get_name[7..-1]
-          css('div .section').map do |node|
-            section_node = node.at_css('h1 a')
-            if section_node != nil
-              section_name = section_node.content.strip
-              items_node = node.at_css('dl.item')
-              if items_node != nil
-                items_node.css('dt a').map do |item_node|
-                  item_name = item_node['name']
-                  if item_name.include? ','
-                    item_name = item_name.sub(',', '(') + ')'
-                  end
-                  entries << [module_name + '.' + item_name, item_node.parent['id']]
-                end
-              end
+
+        if at_css('h1').content.include?('Module ')
+          css('#toc-list > li > .simple-toc-section').each do |node|
+            type = node.previous_element.content.strip
+
+            node.css('a.reference:not(.reference-toplevel)').each do |n|
+              n.css('span').remove
+              name = n.content.strip
+              name << '()' if (type == 'Procs' || type == 'Templates') && !name.include?('`')
+              name.remove! '`'
+              name.prepend "#{self.name}."
+              id = n['href'].remove('#')
+              entries << [name, id] unless entries.any? { |e| e[0] == name }
             end
           end
-        else
-          css('h1', 'h2', 'h3').map do |node|
-            id = node['id']
+        elsif slug == 'manual'
+          css('#toc-list > li > a').each do |node|
             name = node.content.strip
-            if id != nil
-              entries << [name, id]
-            else
-              a = node.at_css('a')
-              if a != nil
-                id = a['id']
-                entries << [name, id]
-              end
+            next if name.start_with?('About')
+            id = node['href'].remove('#')
+            entries << [name, id]
+          end
+
+          css('#toc-list > ul').each do |node|
+            type = node.previous_element.content.strip
+
+            node.css('> li > a').each do |n|
+              entries << [n.content.strip, n['href'].remove('#'), "Manual: #{type}"]
             end
           end
         end
+
         entries
       end
     end

+ 8 - 8
lib/docs/scrapers/nim.rb

@@ -2,20 +2,20 @@ module Docs
   class Nim < UrlScraper
     self.type = 'nim'
     self.release = '0.17.0'
+    self.base_url = 'https://nim-lang.org/docs/'
+    self.root_path = 'overview.html'
     self.links = {
       home: 'https://nim-lang.org/',
       code: 'https://github.com/nim-lang/Nim'
     }
-    self.base_url = 'https://nim-lang.org/'
-    self.root_path = 'docs/overview.html'
 
-    html_filters.push 'nim/entries', 'nim/clean_html' 
-    
-    options[:skip] = %w(cdn-cgi/l/email-protection docs/theindex.html docs/docgen.txt)
+    html_filters.push 'nim/entries', 'nim/clean_html'
+
+    options[:skip] = %w(theindex.html docgen.txt)
+
     options[:attribution] = <<-HTML
       &copy; 2006&ndash;2017 Andreas Rumpf<br>
-      All rights reserved. Licensed under the MIT License.
+      Licensed under the MIT License.
     HTML
-
   end
-end
+end

BIN
public/icons/docs/nim/16.png


BIN
public/icons/docs/nim/16@2x.png


+ 1 - 1
public/icons/docs/nim/SOURCE

@@ -1 +1 @@
-https://nim-lang.org/assets/img/logo.svg
+https://github.com/nim-lang/website/tree/master/jekyll/assets/img