Browse Source

updated yarn version (3.1.1 > 4.5.1)

Rui Jiang 1 year ago
parent
commit
2216cb46cf

+ 7 - 34
lib/docs/filters/yarn/clean_html_berry.rb

@@ -2,45 +2,18 @@ module Docs
   class Yarn
     class CleanHtmlBerryFilter < Filter
       def call
-        if slug.empty?
-          @doc = at_css('main')
-          css(
-            (['div:first-child'] * 3).join('>'), # Tagline
-            'img',
-            'hr', # Footer
-            'hr + div', # Footer
-          ).remove
-
-          css('a').each do |link|
-            link.name = 'div'
-            link.css('h3').each do |node|
-              node.replace("<h2><a href='#{link['href']}'>#{node.content}</a></h2>")
-            end
-          end
-
-          return doc
-        end
-
-        @doc = at_css('article')
-        # Heading & edit link
-        css('h1', 'h1 + a').remove unless slug.start_with?('configuration')
-
-        if slug.start_with?('cli')
-          css('.header-code').each do |node|
-            node.name = 'span'
-          end
-        end
-
-        if slug.start_with?('configuration')
-          css('h1', 'h2').each do |node|
-            node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
-          end
-        end
+        @doc = at_css('main .container div.theme-doc-markdown.markdown')
 
         css('*').each do |node|
           node.remove_attribute('style')
         end
 
+        css('pre').each do |node|
+          lang = node['class'][/language-(\w+)/, 1]
+          node['data-language'] = lang if lang
+          node.content = node.css('.token-line').map(&:content).join("\n")
+        end
+
         doc
       end
     end

+ 2 - 17
lib/docs/filters/yarn/entries_berry.rb

@@ -2,26 +2,11 @@ module Docs
   class Yarn
     class EntriesBerryFilter < Docs::EntriesFilter
       def get_name
-        if slug.start_with?('configuration')
-          filename = at_css('main .active code')
-          content = filename.content
-          return filename.parent.content.sub content, " (#{content})"
-        end
-
-        name = at_css('h1').content
-
-        if slug.start_with?('getting-started')
-          active_link = at_css('main .active')
-          links = active_link.parent.children.to_a
-          name.prepend "#{links.index(active_link) + 1}. "
-        end
-
-        name
+        at_css('main header h1').content
       end
 
       def get_type
-         return 'CLI' if slug.start_with?('sdks', 'pnpify')
-         at_css('header .active').content
+        at_css('nav.navbar a.navbar__item.navbar__link.navbar__link--active').content
       end
     end
   end

+ 13 - 4
lib/docs/scrapers/yarn.rb

@@ -13,15 +13,16 @@ module Docs
     HTML
 
     version 'Berry' do
-      self.release = '3.1.1'
+      self.release = '4.5.1'
       self.base_url = 'https://yarnpkg.com/'
       self.links = {
         home: 'https://yarnpkg.com/',
         code: 'https://github.com/yarnpkg/berry'
       }
-      html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry', 'title'
-      options[:skip] = ['features', 'cli', 'configuration', 'advanced']
-      options[:skip_patterns] = [/\Aapi/, /\Apackage/]
+      self.root_path = 'getting-started'
+      html_filters.push 'yarn/entries_berry', 'yarn/clean_html_berry'
+      options[:skip] = ['cli', 'cli/builder', 'cli/pnpify', 'cli/sdks', 'protocols']
+      options[:skip_patterns] = [/\Aapi/, /\Ablog/, /\Apackage/, /\Aassets/]
     end
 
     version 'Classic' do
@@ -38,5 +39,13 @@ module Docs
     def get_latest_version(opts)
       get_latest_github_release('yarnpkg', 'berry', opts)[/[\d.]+/]
     end
+
+    private
+
+    # Some pages contain null bytes and cause the parser to fail
+    def parse(response)
+      response.body.gsub!(/[\x00\u0000\0]/, '')
+      super
+    end
   end
 end