Bläddra i källkod

Finish Crystal scraper

Thibaut Courouble 9 år sedan
förälder
incheckning
3036c712e9

BIN
assets/images/docs.png


BIN
assets/images/docs@2x.png


+ 1 - 1
assets/javascripts/collections/types.coffee

@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
       (result[@_groupFor(type)] ||= []).push(type)
     result.filter (e) -> e.length > 0
 
-  GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|playbooks|getting\ started|manual)($|[\s\):])/i
+  GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|book|getting\ started|manual)($|[\s\):])/i
 
   _groupFor: (type) ->
     if GUIDES_RGX.test(type.name)

+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2016-07-24",
+    "New documentation: <a href=\"/crystal/\">Crystal</a>"
+  ], [
     "2016-07-03",
     "New documentations: <a href=\"/cmake/\">CMake</a> and <a href=\"/matplotlib/\">Matplotlib</a>"
   ], [

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -160,6 +160,11 @@ credits = [
     '2005-2016 Mozilla Developer Network and individual contributors',
     'CC BY-SA',
     'https://creativecommons.org/licenses/by-sa/2.5/'
+  ], [
+    'Crystal',
+    '2012-2016 Manas Technology Solutions',
+    'Apache',
+    'https://raw.githubusercontent.com/crystal-lang/crystal/master/LICENSE'
   ], [
     'D3.js',
     '2010-2016 Michael Bostock',

+ 0 - 6
assets/javascripts/views/pages/crystal.coffee

@@ -1,6 +0,0 @@
-#= require views/pages/base
-
-class app.views.CrystalPage extends app.views.BasePage
-  prepare: ->
-    @highlightCode @findAllByTag('pre'), 'ruby'
-    return

+ 1 - 0
assets/javascripts/views/pages/simple.coffee

@@ -10,6 +10,7 @@ app.views.AngularPage =
 app.views.AngularjsPage =
 app.views.CakephpPage =
 app.views.ChaiPage =
+app.views.CrystalPage =
 app.views.DrupalPage =
 app.views.ElixirPage =
 app.views.EmberPage =

+ 1 - 0
assets/stylesheets/application-dark.css.scss

@@ -39,6 +39,7 @@
         'pages/cakephp',
         'pages/clojure',
         'pages/coffeescript',
+        'pages/crystal',
         'pages/d3',
         'pages/dojo',
         'pages/drupal',

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -39,6 +39,7 @@
         'pages/cakephp',
         'pages/clojure',
         'pages/coffeescript',
+        'pages/crystal',
         'pages/d3',
         'pages/dojo',
         'pages/drupal',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -130,6 +130,7 @@
 ._icon-vue:before           { background-position: -3rem -8rem; }
 ._icon-opentsdb:before      { background-position: -4rem -8rem; }
 ._icon-q:before             { background-position: -5rem -8rem; }
+._icon-crystal:before       { background-position: -6rem -8rem; @extend %darkIconFix !optional; }
 ._icon-react_native:before  { background-position: 0 -9rem; }
 ._icon-phalcon:before       { background-position: -1rem -9rem; }
 ._icon-matplotlib:before    { background-position: -2rem -9rem; }

+ 21 - 2
assets/stylesheets/pages/_crystal.scss

@@ -1,7 +1,26 @@
 ._crystal {
   @extend %simple;
 
-  blockquote {
-    @extend %note;
+  .signature { @extend %code; }
+  a.signature, .superclass > a { @extend %label; }
+
+  .entry-detail { margin-top: 1em; }
+  .view-source { float: right; }
+
+  .superclass-hierarchy {
+    list-style: none;
+    padding: 0;
+    overflow: hidden;
+  }
+
+  li.superclass {
+    float: left;
+    margin: 0 .5em 0 0;
+    padding: 0;
+  }
+
+  li.superclass + li.superclass:before {
+    content: '<';
+    margin-right: .5em;
   }
 }

+ 36 - 7
lib/docs/filters/crystal/clean_html.rb

@@ -2,18 +2,47 @@ module Docs
   class Crystal
     class CleanHtmlFilter < Filter
       def call
+        slug.start_with?('docs') ? book : api
+        doc
+      end
 
-        # Remove class attr from div and child nodes
-        css("div").each do |node|
-          node.xpath("//@class").remove
+      def book
+        @doc = at_css('.page-inner > section')
+
+        css('pre > code').each do |node|
+          node.parent['data-language'] = node['class'][/lang-(\w+)/, 1] if node['class']
+          node.parent.content = node.parent.content
         end
+      end
+
+      def api
+        @doc = at_css('#main-content')
+
+        at_css('h1 + p').remove if root_page?
+
+        css('.method-permalink', '.doc + br', 'hr', 'a > br', 'div + br').remove
 
-        # Set id attributes on <h1> instead of an empty <a>
-        css("h1").each do |node|
-          node["id"] = node.at_css("a")["id"]
+        css('pre > code').each do |node|
+          node.parent['data-language'] = 'crystal'
+          node.parent.content = node.parent.content
         end
 
-        doc
+        css('span').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('div.signature').each do |node|
+          node.name = 'h3'
+          node.inner_html = node.inner_html.strip
+        end
+
+        css('.entry-detail a:contains("View source")').each do |node|
+          node['class'] = 'view-source'
+          node.content = 'Source'
+          parent = node.parent
+          node.ancestors('.entry-detail').first.at_css('h3') << node
+          parent.remove
+        end
       end
     end
   end

+ 62 - 9
lib/docs/filters/crystal/entries.rb

@@ -1,21 +1,74 @@
 module Docs
   class Crystal
     class EntriesFilter < Docs::EntriesFilter
-
-      # Set the name to h1 content
       def get_name
-        node = at_css("h1")
-        node.content.strip
+        if slug.start_with?('docs/')
+          name = at_css('.page-inner h1').content.strip
+
+          if slug.start_with?('docs/syntax_and_semantics')
+            name.prepend "#{slug.split('/')[2].titleize}: " if slug.split('/').length > 3
+          elsif slug.split('/').length > 1
+            chapter = slug.split('/')[1].titleize.capitalize
+            name.prepend "#{chapter}: " unless name == chapter
+          end
+
+          name
+        else
+          name = at_css('h1').children.last.content.strip
+          name.remove! %r{\(.*\)}
+          name
+        end
       end
 
-      # Crystal types from url slug
       def get_type
-        slug["blob/master/"] = ""
-        object, method = *slug.split("/")
-        object = object.capitalize
-        method ? object : "Index"
+        return if root_page?
+
+        if slug.start_with?('docs/syntax_and_semantics')
+          'Book: Language'
+        elsif slug.start_with?('docs/')
+          'Book'
+        else
+          hierarchy = at_css('.superclass-hierarchy')
+          if hierarchy && hierarchy.content.include?('Exception')
+            'Exceptions'
+          else
+            type = at_css('#types-list > ul > .current > a').content
+            type = 'Float' if type.start_with?('Float')
+            type = 'Int' if type.start_with?('Int')
+            type = 'UInt' if type.start_with?('UInt')
+            type = 'TCP' if type.start_with?('TCP')
+            type
+          end
+        end
       end
 
+      def additional_entries
+        return [] unless slug.start_with?('api')
+        entries = []
+
+        css('.entry-detail[id$="class-method"]').each do |node|
+          name = node.at_css('.signature > strong').content.strip
+          name.prepend "#{self.name}." unless slug.end_with?('toplevel')
+          id = node['id'] = node['id'].remove(/<.+?>/)
+          entries << [name, id] unless entries.last && entries.last[0] == name
+        end
+
+        css('.entry-detail[id$="instance-method"]').each do |node|
+          name = node.at_css('.signature > strong').content.strip
+          name.prepend "#{self.name}#" unless slug.end_with?('toplevel')
+          id = node['id'] = node['id'].remove(/<.+?>/)
+          entries << [name, id] unless entries.last && entries.last[0] == name
+        end
+
+        css('.entry-detail[id$="macro"]').each do |node|
+          name = node.at_css('.signature > strong').content.strip
+          name.prepend "#{self.name} " unless slug.end_with?('toplevel')
+          id = node['id'] = node['id'].remove(/<.+?>/)
+          entries << [name, id] unless entries.last && entries.last[0] == name
+        end
+
+        entries
+      end
     end
   end
 end

+ 28 - 13
lib/docs/scrapers/crystal.rb

@@ -1,22 +1,37 @@
 module Docs
   class Crystal < UrlScraper
-    self.name = "Crystal"
-    self.type = "crystal"
-    self.base_url = "https://github.com/crystal-lang/crystal-book"
-    self.initial_paths = %w(/blob/master/SUMMARY.md)
+    self.type = 'crystal'
+    self.release = '0.18.7'
+    self.base_url = 'https://crystal-lang.org/'
+    self.root_path = 'api/0.18.7/index.html'
+    self.initial_paths = %w(docs/index.html)
     self.links = {
-      home: "https://crystal-lang.org/",
-      code: "https://github.com/crystal-lang/crystal"
+      home: 'https://crystal-lang.org/',
+      code: 'https://github.com/crystal-lang/crystal'
     }
 
-    html_filters.push "crystal/clean_html", "crystal/entries"
+    html_filters.push 'crystal/entries', 'crystal/clean_html'
 
-    options[:container] = ".entry-content"
-    options[:only_patterns] = [/\/blob\/master\/.*\.md/]
-    options[:skip] = %w(/blob/master/README.md)
+    options[:only_patterns] = [/\Adocs\//, /\Aapi\/#{release}\//]
 
-    options[:attribution] = <<-HTML
-      <a href="http://creativecommons.org/publicdomain/zero/1.0/">CC0</a>
-    HTML
+    options[:replace_paths] = {
+      "api/#{release}/" => "api/#{release}/index.html",
+      'docs/' => 'docs/index.html'
+    }
+
+    options[:attribution] = ->(filter) {
+      if filter.slug.start_with?('docs')
+        <<-HTML
+          To the extent possible under law, the persons who contributed to this work
+          have waived<br>all copyright and related or neighboring rights to this work
+          by associating CC0 with it.
+        HTML
+      else
+        <<-HTML
+          &copy; 2012&ndash;2016 Manas Technology Solutions.<br>
+          Licensed under the Apache License, Version 2.0.
+        HTML
+      end
+    }
   end
 end

BIN
public/icons/docs/crystal/16.png


BIN
public/icons/docs/crystal/16@2x.png