Bläddra i källkod

get the thing working for haskell

Romeo Van Snick 11 år sedan
förälder
incheckning
073dbf1ab7

+ 32 - 0
lib/docs/filters/haskell/clean_html.rb

@@ -0,0 +1,32 @@
+module Docs
+  class Haskell
+    class CleanHtmlFilter < Filter
+      def call
+
+        # remove unwanted elements
+        css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.empty', '.package').remove
+
+        # turn captions into real headers
+        css('.caption').each do |node|
+          node.name = 'h2'
+        end
+
+        css('table .caption').each do |node|
+          node.name = 'h3'
+        end
+
+        # # turn source listing in to pre
+        css('.src').each do |node|
+          node.name = 'pre'
+        end
+
+
+        if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries'
+          css('h1').remove
+        end
+
+        doc
+      end
+    end
+  end
+end

+ 56 - 0
lib/docs/filters/haskell/entries.rb

@@ -0,0 +1,56 @@
+module Docs
+  class Haskell
+    class EntriesFilter < Docs::EntriesFilter
+
+      # gets name and type in one fell swoop
+      # 
+      # eg.
+      #  Control.Monad > [Monad, Control]
+      #  Control.Concurrent.Mvar > [Concurrent.MVar, Control]
+      #  Array > [Array, nil]
+      def get_name_and_type
+        if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries'
+          name = 'Haskell'
+          type = nil
+        else
+          # find full module identifier
+          caption = at_css('#module-header .caption')
+
+          if caption
+            # split the module path
+            parts   = caption.content.split('.')
+
+            if parts.length > 1
+              # if more than one part then the 
+              # first is the type and the rest is the name
+              type = parts[0]
+              name = parts.drop(1).join('.')
+            else
+              # if only one part, this is the name
+              name = parts[0]
+              type = nil
+            end
+          else
+            # no caption found -> no type / no name
+            name = 'no-name'
+            type = 'no-type'
+          end
+        end
+        [name, type]
+      end
+
+      # get the name
+      def get_name
+        n, t = get_name_and_type()
+        n
+      end
+
+      # get the type
+      def get_type
+        n, t = get_name_and_type()
+        t
+      end
+
+    end
+  end
+end

+ 15 - 0
lib/docs/scrapers/haskell.rb

@@ -2,8 +2,23 @@ module Docs
   class Haskell < UrlScraper
     self.name = 'Haskell'
     self.slug = 'haskell'
+    self.type = 'haskell'
     self.version = '7.8.2'
     self.base_url = 'http://www.haskell.org/ghc/docs/7.8.2/html/libraries'
+    self.initial_paths = ['/index.html']
+
+    html_filters.push 'haskell/entries'
+    html_filters.push 'haskell/clean_html'
+    html_filters.push 'title'
+
+
+    options[:container]     = '#content'
+    options[:skip_patterns] = [/src/, /index/, /haskell2010/]   # skip source listings and index files
+
+    options[:attribution] = <<-HTML
+      &copy; The University Court of the University of Glasgow.<br>
+      All rights reserved. <a href="http://www.haskell.org/ghc/license">See here for more info</a>
+    HTML
 
     end
 end