Quellcode durchsuchen

Finish Vulkan scraper

Thibaut Courouble vor 8 Jahren
Ursprung
Commit
0df3a77558

BIN
assets/images/docs-2.png


BIN
assets/images/docs-2@2x.png


+ 1 - 1
assets/javascripts/news.json

@@ -1,7 +1,7 @@
 [
   [
     "2017-09-03",
-    "New documentation: <a href=\"/nim/\">Nim</a>"
+    "New documentations: <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
   ], [
     "2017-07-23",
     "New documentation: <a href=\"/godot/\">Godot</a>"

+ 5 - 0
assets/javascripts/templates/pages/about_tmpl.coffee

@@ -633,6 +633,11 @@ credits = [
     '2013-2017 Evan You, Vue.js contributors',
     'MIT',
     'https://raw.githubusercontent.com/vuejs/vue/master/LICENSE'
+  ], [
+    'Vulkan',
+    '2014-2017 Khronos Group Inc.<br>Vulkan and the Vulkan logo are registered trademarks of the Khronos Group Inc.',
+    'CC BY',
+    'https://creativecommons.org/licenses/by/4.0/'
   ], [
     'webpack',
     'JS Foundation and other contributors',

+ 1 - 0
assets/stylesheets/global/_icons.scss

@@ -174,3 +174,4 @@
 ._icon-falcon:before        { background-position: -3rem -2rem; @extend %doc-icon-2; }
 ._icon-godot:before         { background-position: -4rem -2rem; @extend %doc-icon-2; }
 ._icon-nim:before           { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
+._icon-vulkan:before        { background-position: -6rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }

+ 1 - 0
assets/stylesheets/pages/_simple.scss

@@ -43,6 +43,7 @@
 ._requirejs,
 ._typescript,
 ._vagrant,
+._vulkan,
 ._yarn {
   @extend %simple;
 }

+ 43 - 3
lib/docs/filters/vulkan/clean_html.rb

@@ -2,11 +2,51 @@ module Docs
   class Vulkan
     class CleanHtmlFilter < Filter
       def call
-        # Copyright is already added via attribution option
-        css('#_copyright').map do |node|
-          node.parent.remove
+        at_css('#_copyright').parent.remove
+
+        css('.sect1', '.sectionbody', '.sect2', '.sect3', 'div.paragraph', 'li > p:only-child', 'dd > p:only-child', 'span', '.ulist').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('a[id]:empty').each do |node|
+          node.parent['id'] ||= node['id']
+          node.remove
+        end
+
+        css('.listingblock').each do |node|
+          node['data-language'] = node.at_css('[data-lang]')['data-lang']
+          node.content = node.content.strip
+          node.name = 'pre'
+          node.remove_attribute('class')
+        end
+
+        css('.sidebarblock').each do |node|
+          node.name = 'blockquote'
+          node.at_css('.title').name = 'h5'
+          node.css('div').each { |n| n.before(n.children).remove }
+          node.remove_attribute('class')
         end
 
+        css('.admonitionblock').each do |node|
+          node.name = 'blockquote'
+          node.children = node.at_css('.content').children
+          node.at_css('.title').name = 'h5'
+          node.remove_attribute('class')
+        end
+
+        css('table').each do |node|
+          node.before %(<div class="_table"></div>)
+          node.previous_element << node
+        end
+
+        css('strong', 'dt', 'a').remove_attr('class')
+
+        css('h4 + h4').each do |node|
+          node.previous_element.remove
+        end
+
+        css('p:contains("This page is extracted from the Vulkan Specification. Fixes and changes should be made to the Specification, not directly.")').remove
+
         doc
       end
     end

+ 5 - 32
lib/docs/filters/vulkan/entries.rb

@@ -1,39 +1,12 @@
 module Docs
   class Vulkan
     class EntriesFilter < Docs::EntriesFilter
-      def get_name
-        name = at_css('h1').content.strip
-        name
-      end
-
-      def get_type
-        # As only documentation is single-paged, hardcode type
-        initial_page? ? 'Vulkan' : 'Specifications'
-      end
-
-      def include_default_entry?
-        # additional_entries is responsible to extract relevant entries
-        false
-      end
-
       def additional_entries
-        if initial_page?
-          # We pack each subsections into their corresponding category for apispec.html
-          subsections = css('.sect2').map do |node|
-            # Parse '.sect1' parent, to know what is the entry's type
-            parent_node = node.parent.parent
-            # Type is the parent's h2 header
-            type = parent_node.at_css('h2').content.strip
-            # Entry node is the one under h3
-            header_node = node.at_css('h3')
-            [header_node.content, header_node['id'], type]
-          end
-        else
-          # We create a new category for vkspec.html page
-          main_sections = css('.sect1').map do |node|
-            # Entry node is the one under h2
-            header_node = node.at_css('h2')
-            [header_node.content, header_node['id'], 'Specifications']
+        css('.sect1').each_with_object [] do |node, entries|
+          type = node.at_css('h2').content
+
+          node.css('h3').each do |n|
+            entries << [n.content, n['id'], type]
           end
         end
       end

+ 10 - 20
lib/docs/scrapers/vulkan.rb

@@ -1,34 +1,24 @@
 module Docs
-  # class Vulkan < FileScraper
   class Vulkan < UrlScraper
     self.name = 'Vulkan'
-
-    self.slug = 'vk'
     self.type = 'vulkan'
+    self.release = '1.0.59'
+    self.base_url = 'https://www.khronos.org/registry/vulkan/specs/1.0/'
+    self.root_path = 'apispec.html'
     self.links = {
-      home: 'https://www.khronos.org/registry/vulkan/specs/',
-      code: 'https://github.com/KhronosGroup/Vulkan-Docs'
+      home: 'https://www.khronos.org/vulkan/'
     }
 
-    self.root_path = 'apispec.html'
-
-    self.release = '1.0.56'
-    # self.dir = '/mnt/d/theblackunknown/Documents/GitHub/Vulkan-Docs/out/1.0/'
-    self.base_url = 'https://www.khronos.org/registry/vulkan/specs/1.0/'
-
-    html_filters.push 'vulkan/entries', 'vulkan/clean_html'
+    html_filters.push 'vulkan/entries', 'vulkan/clean_html', 'title'
 
-    # in apispec.html, skip #header and #footer
+    options[:skip_links] = true
     options[:container] = '#content'
-
-    # If we only want API, we should skip this one
-    options[:skip] = %w(
-      html/vkspec.html
-    )
+    options[:root_title] = 'Vulkan API Reference'
 
     options[:attribution] = <<-HTML
-      Copyright &copy; 2014-2017 Khronos Group. <br>
-      This work is licensed under a Creative Commons Attribution 4.0 International License
+      &copy; 2014&ndash;2017 Khronos Group Inc.<br>
+      Licensed under the Creative Commons Attribution 4.0 International License.<br>
+      Vulkan and the Vulkan logo are registered trademarks of the Khronos Group Inc.
     HTML
   end
 end

BIN
public/icons/docs/vulkan/16.png


BIN
public/icons/docs/vulkan/16@2x.png