Przeglądaj źródła

Finish Kotlin scraper

Thibaut Courouble 9 lat temu
rodzic
commit
78cf26a6a7

BIN
assets/images/icons.png


BIN
assets/images/icons@2x.png


+ 1 - 1
assets/javascripts/collections/types.coffee

@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
       (result[@_groupFor(type)] ||= []).push(type)
     result.filter (e) -> e.length > 0
 
-  GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started|manual)($|[\s\):])/i
+  GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|playbooks|getting\ started|manual)($|[\s\):])/i
 
   _groupFor: (type) ->
     if GUIDES_RGX.test(type.name)

+ 3 - 0
assets/javascripts/news.json

@@ -1,5 +1,8 @@
 [
   [
+    "2016-06-05",
+    "New documentation: <a href=\"/kotlin/\">Kotlin</a>"
+  ], [
     "2016-04-24",
     "New documentations: <a href=\"/numpy/\">NumPy</a> and <a href=\"/apache_pig/\">Apache Pig</a>"
   ], [

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -250,6 +250,11 @@ credits = [
     'Steven Sanderson, the Knockout.js team, and other contributors',
     'MIT',
     'https://raw.githubusercontent.com/knockout/knockout/master/LICENSE'
+  ], [
+    'Kotlin',
+    '2010-2016 JetBrains s.r.o.',
+    'Apache',
+    'https://raw.githubusercontent.com/JetBrains/kotlin-web-site/master/LICENSE'
   ], [
     'Laravel',
     'Taylor Otwell',

+ 1 - 0
assets/javascripts/views/pages/simple.coffee

@@ -11,6 +11,7 @@ app.views.CakephpPage =
 app.views.EmberPage =
 app.views.ExpressPage =
 app.views.GoPage =
+app.views.KotlinPage =
 app.views.LaravelPage =
 app.views.LodashPage =
 app.views.MarionettePage =

+ 1 - 0
assets/stylesheets/application-dark.css.scss

@@ -50,6 +50,7 @@
         'pages/haskell',
         'pages/jquery',
         'pages/knockout',
+        'pages/kotlin',
         'pages/laravel',
         'pages/lua',
         'pages/mdn',

+ 1 - 0
assets/stylesheets/application.css.scss

@@ -50,6 +50,7 @@
         'pages/haskell',
         'pages/jquery',
         'pages/knockout',
+        'pages/kotlin',
         'pages/laravel',
         'pages/lua',
         'pages/mdn',

+ 1 - 0
assets/stylesheets/global/_base.scss

@@ -155,6 +155,7 @@ td {
     margin-top: 0;
     margin-bottom: 0;
   }
+  > pre:first-child, > p:first-child, > ul:first-child, > ol:first-child { margin-top: 0; }
   > pre:last-child, > p:last-child, > ul:last-child, > ol:last-child { margin-bottom: 0; }
 }
 

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -139,3 +139,4 @@
 ._icon-perl:before          { background-position: -3rem -11rem; }
 ._icon-apache_pig:before    { background-position: -4rem -11rem; }
 ._icon-numpy:before         { background-position: -5rem -11rem; }
+._icon-kotlin:before        { background-position: -6rem -11rem; }

+ 9 - 0
assets/stylesheets/pages/_kotlin.scss

@@ -0,0 +1,9 @@
+._kotlin {
+  h2 { @extend %block-heading; }
+  h3 { @extend %block-label, %label-blue; }
+  code { @extend %label; }
+
+  td > pre { margin: .5em 0; }
+
+  .api-docs-breadcrumbs { @extend %note; }
+}

+ 40 - 0
lib/docs/filters/kotlin/clean_html.rb

@@ -2,8 +2,48 @@ module Docs
   class Kotlin
     class CleanHtmlFilter < Filter
       def call
+        @doc = at_css('.page-content')
+        subpath.start_with?('api') ? api_page : doc_page
         doc
       end
+
+      def doc_page
+        css('.page-link-to-github').remove
+
+        css('a > img').each do |node|
+          node.parent.before(node.parent.content).remove
+        end
+
+        css('pre').each do |node|
+          node['data-language'] = node.at_css('code')['class'][/lang_(\w+)/, 1] if node.at_css('code')
+          node.content = node.content
+        end
+      end
+
+      def api_page
+        at_css('h1, h2, h3').name = 'h1'
+
+        if breadcrumbs = at_css('.api-docs-breadcrumbs')
+          at_css('h1').after(breadcrumbs)
+        end
+
+        unless at_css('h2')
+          css('h3').each do |node|
+            node.name = 'h2'
+          end
+        end
+
+        css('a[href="#"]').each do |node|
+          node.before(node.content).remove
+        end
+
+        css('.signature > code').each do |node|
+          parent = node.parent
+          parent.name = 'pre'
+          parent.inner_html = node.inner_html.gsub('<br>', "\n").strip
+          parent.content = parent.content
+        end
+      end
     end
   end
 end

+ 10 - 32
lib/docs/filters/kotlin/entries.rb

@@ -2,49 +2,27 @@ module Docs
   class Kotlin
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        if at_css('h1')
-          name = at_css('h1').text
-          module_name = breadcrumbs[1]
-
-          "#{module_name}.#{name}"
-        elsif at_css('h2')
-          at_css('h2').text.gsub 'Package ', ''
-        elsif at_css('h3')
-          at_css('h3').text
+        if subpath.start_with?('api')
+          breadcrumbs[1..-1].join('.')
+        else
+          at_css('h1').content
         end
       end
 
       def get_type
-        if package? || top_level? && !extensions?
+        if subpath.start_with?('api')
           breadcrumbs[1]
-        else
-          "miscellaneous"
+        elsif subpath.start_with?('docs/tutorials')
+          'Tutorials'
+        elsif subpath.start_with?('docs/reference')
+          'Reference'
         end
       end
 
       private
 
       def breadcrumbs
-        container = at_css('.api-docs-breadcrumbs')
-
-        if container
-          links = container.children.select.with_index { |_, i| i.even? }
-          links.map &:text
-        else
-          []
-        end
-      end
-
-      def top_level?
-        breadcrumbs.size == 3
-      end
-
-      def extensions?
-        get_name.start_with? 'Extensions'
-      end
-
-      def package?
-        breadcrumbs.size == 2
+        @breadcrumbs ||= css('.api-docs-breadcrumbs a').map(&:content).map(&:strip)
       end
     end
   end

+ 14 - 4
lib/docs/scrapers/kotlin.rb

@@ -2,8 +2,8 @@ module Docs
   class Kotlin < UrlScraper
     self.type = 'kotlin'
     self.release = '1.0.2'
-    self.base_url = 'https://kotlinlang.org/api/latest/jvm/stdlib/'
-    self.root_path = 'index.html'
+    self.base_url = 'https://kotlinlang.org/'
+    self.root_path = 'api/latest/jvm/stdlib/index.html'
     self.links = {
       home: 'https://kotlinlang.org/',
       code: 'https://github.com/JetBrains/kotlin'
@@ -11,10 +11,20 @@ module Docs
 
     html_filters.push 'kotlin/entries', 'kotlin/clean_html'
 
-    options[:container] = '.page-content'
+    options[:container] = '.global-content'
+
+    options[:only_patterns] = [/\Adocs\/tutorials\//, /\Adocs\/reference\//, /\Aapi\/latest\/jvm\/stdlib\//]
+    options[:skip] = %w(
+      api/latest/jvm/stdlib/alltypes/index.html
+      docs/
+      docs/videos.html
+      docs/events.html
+      docs/resources.html
+      docs/reference/grammar.html)
+    options[:replace_paths] = { 'api/latest/jvm/stdlib/' => 'api/latest/jvm/stdlib/index.html' }
 
     options[:attribution] = <<-HTML
-      &copy; 2016 JetBrains<br>
+      &copy; 2010&ndash;2016 JetBrains s.r.o.<br>
       Licensed under the Apache License, Version 2.0.
     HTML
   end

BIN
public/icons/docs/kotlin/16.png


BIN
public/icons/docs/kotlin/16@2x.png


+ 1 - 1
public/icons/docs/kotlin/SOURCE

@@ -1 +1 @@
-https://upload.wikimedia.org/wikipedia/commons/b/b5/Kotlin-logo.png
+https://github.com/JetBrains/kotlin-web-site/blob/master/assets/images/favicon.ico