Browse Source

Merge pull request #2201 from jessevanherk/godot_4.2

Update Godot docs to include v4.2 and fix older version scraping
Simon Legner 1 year ago
parent
commit
0dd0ad813f

+ 20 - 3
lib/docs/filters/godot/clean_html.rb

@@ -6,12 +6,13 @@ module Docs
           at_css('h1').content = 'Godot Engine'
           at_css('.admonition.note').remove
         end
+        css('.admonition-grid').remove
 
-        css('ul[id].simple li:first-child:last-child').each do |node|
+        css('p[id]').each do |node|
           heading = Nokogiri::XML::Node.new 'h3', doc.document
-          heading['id'] = node.parent['id']
+          heading['id'] = node['id']
           heading.children = node.children
-          node.parent.before(heading).remove
+          node.before(heading).remove
         end
 
         css('h3 strong').each do |node|
@@ -20,6 +21,22 @@ module Docs
 
         css('a.reference').remove_attr('class')
 
+        # flatten gdscript+C# example blocks and add language name.
+        css('div[role="tabpanel"]').each do |node|
+          language_label = Nokogiri::XML::Node.new 'strong', doc.document
+          language_name = 'GDScript' if node.at_css('div.highlight-gdscript')
+          language_name = 'C#' if node.at_css('div.highlight-csharp')
+          language_label.content = language_name.to_s
+
+          node.before(language_label)
+          node.before(node.children).remove
+        end
+
+        css('div.sphinx-tabs [role="tablist"]').remove
+
+        # remove the remotely hosted "percent-translated" badge
+        css('a[href^="https://hosted.weblate"]').remove if root_page?
+
         doc
       end
     end

+ 1 - 1
lib/docs/filters/godot/clean_html_v2.rb

@@ -4,7 +4,7 @@ module Docs
       def call
         if root_page?
           at_css('h1').content = 'Godot Engine'
-          at_css('.admonition.tip').remove
+          at_css('.admonition.caution').remove
         end
 
         css('ul[id].simple li:first-child:last-child').each do |node|

+ 27 - 0
lib/docs/filters/godot/clean_html_v3.rb

@@ -0,0 +1,27 @@
+module Docs
+  class Godot
+    class CleanHtmlV3Filter < Filter
+      def call
+        if root_page?
+          at_css('h1').content = 'Godot Engine'
+          at_css('.admonition.caution').remove
+        end
+
+        css('ul[id].simple li:first-child:last-child').each do |node|
+          heading = Nokogiri::XML::Node.new 'h3', doc.document
+          heading['id'] = node.parent['id']
+          heading.children = node.children
+          node.parent.before(heading).remove
+        end
+
+        css('h3 strong').each do |node|
+          node.before(node.children).remove
+        end
+
+        css('a.reference').remove_attr('class')
+
+        doc
+      end
+    end
+  end
+end

+ 4 - 2
lib/docs/filters/godot/entries.rb

@@ -11,7 +11,7 @@ module Docs
         if slug.start_with?('getting_started')
           # Getting started sections are different even between different minor
           # versions from v3 so we're programmatically generating them instead.
-          "Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
+          'Getting started: ' + slug.split('/')[1].tr_s('_', ' ').capitalize
         else
           name
         end
@@ -20,9 +20,10 @@ module Docs
       def additional_entries
         return [] unless slug.start_with?('classes')
 
-        css('.simple[id]').each_with_object [] do |node, entries|
+        css('p[id]').each_with_object [] do |node, entries|
           name = node.at_css('strong').content
           next if name == self.name
+
           name.prepend "#{self.name}."
           name << '()'
           entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
@@ -32,6 +33,7 @@ module Docs
       def include_default_entry?
         return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
         return false if subpath == 'classes/index.html'
+
         true
       end
     end

+ 39 - 0
lib/docs/filters/godot/entries_v3.rb

@@ -0,0 +1,39 @@
+module Docs
+  class Godot
+    class EntriesV3Filter < Docs::EntriesFilter
+      def get_name
+        name = at_css('h1').content
+        name.remove! "\u{00B6}" # Remove the pilcrow
+        name
+      end
+
+      def get_type
+        if slug.start_with?('getting_started')
+          # Getting started sections are different even between different minor
+          # versions from v3 so we're programmatically generating them instead.
+          "Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
+        else
+          name
+        end
+      end
+
+      def additional_entries
+        return [] unless slug.start_with?('classes')
+
+        css('.simple[id]').each_with_object [] do |node, entries|
+          name = node.at_css('strong').content
+          next if name == self.name
+          name.prepend "#{self.name}."
+          name << '()'
+          entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
+        end
+      end
+
+      def include_default_entry?
+        return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
+        return false if subpath == 'classes/index.html'
+        true
+      end
+    end
+  end
+end

+ 35 - 22
lib/docs/scrapers/godot.rb

@@ -5,59 +5,72 @@ module Docs
       home: 'https://godotengine.org/',
       code: 'https://github.com/godotengine/godot'
     }
+    # godot docs since 3.5 don't link everything from the index.
+    self.initial_paths = %w[
+      getting_started/introduction/index.html
+      getting_started/step_by_step/index.html
+      classes/index.html
+    ]
 
-    options[:container] = '.document .section'
-
+    options[:container] = '.document > [itemprop="articleBody"]'
     options[:download_images] = false
-    options[:only_patterns] = [/\Agetting_started\//, /\Aclasses\//]
+    options[:only_patterns] = [%r{\Agetting_started/}, %r{\Aclasses/}]
+
+    options[:attribution] = <<-HTML
+      &copy; 2014&ndash;present Juan Linietsky, Ariel Manzur and the Godot community<br>
+      Licensed under the Creative Commons Attribution Unported License v3.0.
+    HTML
 
-    options[:attribution] = ->(filter) do
-      if filter.subpath.start_with?('classes')
-         <<-HTML
-          &copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur, Godot Engine contributors<br>
-          Licensed under the MIT License.
-        HTML
-      else
-        <<-HTML
-          &copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur and the Godot community<br>
-          Licensed under the Creative Commons Attribution Unported License v3.0.
-        HTML
-      end
+    version '4.2' do
+      self.release = '4.2.2'
+      self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
+      html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
     end
 
     version '3.5' do
-      self.release = '3.5.1'
+      self.release = '3.5.3'
       self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
-      options[:container] = '.document > [itemprop="articleBody"] > section[id]'
+
+      # godot 3.5 upstream docs are formatted like godot4
       html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
     end
 
     version '3.4' do
       self.release = '3.4.5'
       self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
+
       options[:container] = '.document > [itemprop="articleBody"] > section[id]'
-      html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
+      html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
     end
 
     version '3.3' do
       self.release = '3.3.0'
       self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
-      html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
+      self.initial_paths = %w[/index.html]
+
+      options[:only_patterns] = [%r{\Aclasses/}]
+      options[:container] = '.document .section'
+      html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
     end
 
     version '3.2' do
       self.release = '3.2.3'
       self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
-      html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
+      self.initial_paths = %w[/index.html]
+
+      options[:only_patterns] = [%r{\Aclasses/}]
+      options[:container] = '.document .section'
+      html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
     end
 
     version '2.1' do
       self.release = '2.1.6'
       self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
+      self.initial_paths = %w[/index.html]
 
       options[:skip] = %w(classes/class_@global\ scope.html)
-      options[:only_patterns] = [/\Alearning\//, /\Aclasses\//]
-
+      options[:only_patterns] = [%r{\Alearning/}, %r{\Aclasses/}]
+      options[:container] = '.document .section'
       html_filters.push 'godot/entries_v2', 'godot/clean_html_v2', 'sphinx/clean_html'
     end