1
0
Эх сурвалжийг харах

PyTorch 1.6+ scraper code cleanup

Phil Scherer 5 жил өмнө
parent
commit
5afcd785d7

+ 10 - 14
lib/docs/filters/pytorch/entries.rb

@@ -1,27 +1,23 @@
 module Docs
   class Pytorch
     class EntriesFilter < Docs::EntriesFilter
-      def get_name
-        breadcrumbs = at_css('.pytorch-breadcrumbs')
-        name_in_breadcrumb = breadcrumbs.css('li')[1].content
-
-        article = at_css('.pytorch-article')
+      NAME_REPLACEMENTS = {
+        "Distributed communication package - torch.distributed" => "torch.distributed"
+      }
 
-        # hard-coded name replacements, for better presentation.
-        name_replacements = {
-          "Distributed communication package - torch.distributed" => "torch.distributed"
-        }
+      def get_breadcrumbs()
+        css('.pytorch-breadcrumbs > li').map { |node| node.content.delete_suffix(' >') }
+      end
 
+      def get_name
         # The id of the container `div.section` indicates the page type.
         # If the id starts with `module-`, then it's an API reference,
         # otherwise it is a note or design doc.
-        article_id = article.at_css('div.section')['id']
-        if article_id.starts_with? 'module-'
+        if at_css('.section')['id'].starts_with? 'module-'
           /\Amodule-(.*)/.match(article_id)[1]
         else
-          name_in_breadcrumb = name_in_breadcrumb.delete_suffix(' >')
-          name_in_breadcrumb = name_replacements.fetch(name_in_breadcrumb, name_in_breadcrumb)
-          name_in_breadcrumb
+          name = get_breadcrumbs()[1]
+          NAME_REPLACEMENTS.fetch(name, name)
         end
       end