瀏覽代碼

Update DuckDB documentation (1.1.3)

Simon Legner 1 年之前
父節點
當前提交
deedda3161

+ 4 - 0
assets/javascripts/news.json

@@ -1,4 +1,8 @@
 [
+  [
+    "2024-11-23",
+    "New documentation: <a href=\"/duckdb/\">DuckDB</a>"
+  ],
   [
     "2024-08-20",
     "New documentation: <a href=\"/man/\">Linux man pages</a>"

+ 12 - 0
lib/docs/filters/duckdb/attribution.rb

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Docs
+  class Duckdb
+    class AttributionFilter < Docs::AttributionFilter
+      def attribution_link
+        url = current_url.to_s.sub! 'http://localhost:8000', 'https://duckdb.org'
+        %(<a href="#{url}" class="_attribution-link">#{url}</a>)
+      end
+    end
+  end
+end

+ 10 - 11
lib/docs/filters/duckdb/clean_html.rb

@@ -3,27 +3,26 @@ module Docs
     class CleanHtmlFilter < Filter
       def call
         # First extract the main content
-        @doc = at_css('main')
+        @doc = at_css('#main_content_wrap', 'main')
         return doc if @doc.nil?
 
+        doc.prepend_child at_css('.title').remove
+        at_css('.title').name = 'h1'
+
         # Remove navigation and header elements
-        css('.headerline', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove
+        css('.headerline', '.headlinebar', '.landingmenu', '.search_icon', '#sidebar', '.pagemeta', '.toc_menu', '.section-nav').remove
 
         # Clean up code blocks
-        css('pre').each do |node|
-          # Detect language from class or parent div
-          if node['class']&.include?('sql') || node.at_css('code.sql')
-            node['data-language'] = 'sql'
-          elsif node['class']&.include?('language-sql')
-            node['data-language'] = 'sql'
-          end
+        css('div.highlighter-rouge').each do |node|
+          node['data-language'] = node['class'][/language-(\w+)/, 1] if node['class']
           node.content = node.content.strip
+          node.name = 'pre'
         end
 
-        # Remove unnecessary attributes but keep essential ones
+        # Remove unnecessary attributes
         css('div, span, p').each do |node|
           node.remove_attribute('style')
-          node.remove_attribute('class') unless node['class'] =~ /highlight/
+          node.remove_attribute('class')
         end
 
         # Remove empty elements

+ 1 - 1
lib/docs/filters/duckdb/entries.rb

@@ -2,7 +2,7 @@ module Docs
   class Duckdb
     class EntriesFilter < Docs::EntriesFilter
       def get_name
-        at_css('h1')&.content || 'DuckDB'
+        at_css('h1', '.title').content
       end
 
       def get_type

+ 7 - 34
lib/docs/scrapers/duckdb.rb

@@ -8,7 +8,13 @@ module Docs
       code: 'https://github.com/duckdb/duckdb'
     }
 
+    # https://duckdb.org/docs/guides/offline-copy.html
+    # curl -O https://duckdb.org/duckdb-docs.zip; bsdtar xf duckdb-docs.zip; cd duckdb-docs; python -m http.server
+    self.release = '1.1.3'
+    self.base_url = 'http://localhost:8000/docs/'
+
     html_filters.push 'duckdb/entries', 'duckdb/clean_html'
+    text_filters.replace 'attribution', 'duckdb/attribution'
 
     options[:container] = '.documentation'
     
@@ -29,41 +35,8 @@ module Docs
       Licensed under the MIT License.
     HTML
 
-    version '1.1' do
-      self.release = '1.1.x'
-      self.base_url = 'http://localhost:8000/docs/'
-    end
-
-    # version '1.0' do
-    #     self.release = '1.0.x'
-    #     self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
-    #     html_filters.push 'duckdb/clean_html'
-    # end
-
-    # version '0.9' do
-    #     self.release = '0.9.x'
-    #     self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
-    #     html_filters.push 'duckdb/clean_html'
-    # end
-
-    # version '0.8' do
-    #     self.release = '0.8.x'
-    #     self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
-    #     html_filters.push 'duckdb/clean_html'
-    # end
-
-    # version '0.7' do
-    #     self.release = '0.7.x'
-    #     self.base_url = "https://duckdb.org/docs/archive/#{self.version}/"
-
-    #     html_filters.push 'duckdb/clean_html'
-    # end
-
     def get_latest_version(opts)
-      get_github_tags('duckdb', 'duckdb', opts)
+      get_github_tags('duckdb', 'duckdb', opts)[0]['name']
     end
   end
 end