4 年之前 · fdfcf3d917
--- a/assets/stylesheets/application.css.scss
+++ b/assets/stylesheets/application.css.scss
@@ -39,7 +39,7 @@
 
				         'pages/async',
			
 
				         'pages/bash',
			
 
				         'pages/bootstrap',
			
 
				-        'pages/c',
			
 
				+        'pages/cppref',
			
 
				         'pages/cakephp',
			
 
				         'pages/clojure',
			
 
				         'pages/codeception',
			
--- a/assets/stylesheets/pages/_cppref.scss
+++ b/assets/stylesheets/pages/_cppref.scss
--- a/lib/docs/filters/c/clean_html.rb
+++ b/lib/docs/filters/c/clean_html.rb
@@ -1,116 +0,0 @@
 
				-module Docs
			
 
				-  class C
			
 
				-    class CleanHtmlFilter < Filter
			
 
				-      def call
			
 
				-        css('h1').remove if root_page?
			
 
				-
			
 
				-        css('.t-dcl-rev-aux td[rowspan]').each do |node|
			
 
				-          rowspan = node['rowspan'].to_i
			
 
				-          node['rowspan'] = node.ancestors('tbody').css('tr').length if rowspan > 3
			
 
				-        end
			
 
				-
			
 
				-        css('#siteSub', '#contentSub', '.printfooter', '.t-navbar', '.editsection', '#toc',
			
 
				-            '.t-dsc-sep', '.t-dcl-sep', '#catlinks', '.ambox-notice', '.mw-cite-backlink',
			
 
				-            '.t-sdsc-sep:first-child:last-child', '.t-example-live-link',
			
 
				-            '.t-dcl-rev-num > .t-dcl-rev-aux ~ tr:not(.t-dcl-rev-aux) > td:nth-child(2)').remove
			
 
				-
			
 
				-        css('#bodyContent', '.mw-content-ltr', 'span[style]', 'div[class^="t-ref"]', '.t-image',
			
 
				-            'th > div', 'td > div', '.t-dsc-see', '.mainpagediv', 'code > b', 'tbody').each do |node|
			
 
				-          node.before(node.children).remove
			
 
				-        end
			
 
				-
			
 
				-        css('div > ul').each do |node|
			
 
				-          node.parent.before(node.parent.children).remove
			
 
				-        end
			
 
				-
			
 
				-        css('dl > dd:first-child:last-child > ul:first-child:last-child').each do |node|
			
 
				-          dl = node.parent.parent
			
 
				-          if dl.previous_element && dl.previous_element.name == 'ul'
			
 
				-            dl.previous_element << node
			
 
				-            dl.remove
			
 
				-          else
			
 
				-            dl.before(node).remove
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        css('dl > dd:first-child:last-child').each do |node|
			
 
				-          node.parent.before(node.children).remove
			
 
				-        end
			
 
				-
			
 
				-        css('ul').each do |node|
			
 
				-          while node.next_element && node.next_element.name == 'ul'
			
 
				-            node << node.next_element.children
			
 
				-            node.next_element.remove
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        css('h2 > span[id]', 'h3 > span[id]', 'h4 > span[id]', 'h5 > span[id]', 'h6 > span[id]').each do |node|
			
 
				-          node.parent['id'] = node['id']
			
 
				-          node.before(node.children).remove
			
 
				-        end
			
 
				-
			
 
				-        css('table[style]', 'th[style]', 'td[style]').remove_attr('style')
			
 
				-        css('table[cellpadding]').remove_attr('cellpadding')
			
 
				-
			
 
				-        css('.t-dsc-hitem > td', '.t-dsc-header > td').each do |node|
			
 
				-          node.name = 'th'
			
 
				-          node.content = ' ' if node.content.empty?
			
 
				-        end
			
 
				-
			
 
				-        css('tt', 'span > span.source-cpp', 'span.t-c', 'span.t-lc', 'span.t-dsc-see-tt').each do |node|
			
 
				-          node.name = 'code'
			
 
				-          node.remove_attribute('class')
			
 
				-          node.content = node.content unless node.at_css('a')
			
 
				-        end
			
 
				-
			
 
				-        css('div > span.source-cpp').each do |node|
			
 
				-          node.name = 'pre'
			
 
				-          node.inner_html = node.inner_html.gsub('<br>', "\n")
			
 
				-          node.content = node.content
			
 
				-        end
			
 
				-
			
 
				-        css('div > a > img[alt="About this image"]').each do |node|
			
 
				-          node.parent.parent.remove
			
 
				-        end
			
 
				-
			
 
				-        css('area[href]').each do |node|
			
 
				-          node['href'] = node['href'].remove('.html')
			
 
				-        end
			
 
				-
			
 
				-        css('p').each do |node|
			
 
				-          while node.next && (node.next.text? || node.next.name == 'a' || node.next.name == 'code')
			
 
				-            node << node.next
			
 
				-          end
			
 
				-          node.inner_html = node.inner_html.strip
			
 
				-          node << '.' if node.content =~ /[a-zA-Z0-9\)]\z/
			
 
				-          node.remove if node.content.blank? && !node.at_css('img')
			
 
				-        end
			
 
				-
			
 
				-        css('pre').each do |node|
			
 
				-          node['data-language'] = if node['class'].try(:include?, 'cpp') || node.parent['class'].try(:include?, 'cpp')
			
 
				-            'cpp'
			
 
				-          else
			
 
				-            'c'
			
 
				-          end
			
 
				-          node.remove_attribute('class')
			
 
				-          node.content = node.content.gsub("\t", ' ' * 8)
			
 
				-        end
			
 
				-
			
 
				-        css('code code', '.mw-geshi').each do |node|
			
 
				-          node.before(node.children).remove
			
 
				-        end
			
 
				-
			
 
				-        css('h1 ~ .fmbox').each do |node|
			
 
				-          node.name = 'div'
			
 
				-          node.content = node.content
			
 
				-        end
			
 
				-
			
 
				-        css('img').each do |node|
			
 
				-          node['src'] = node['src'].sub! %r{http://en.cppreference.com/common/([^"']+?)\.svg}, 'http://upload.cppreference.com/mwiki/\1.svg'
			
 
				-        end
			
 
				-
			
 
				-        doc
			
 
				-      end
			
 
				-    end
			
 
				-  end
			
 
				-end
			
--- a/lib/docs/filters/c/entries.rb
+++ b/lib/docs/filters/c/entries.rb
@@ -22,6 +22,9 @@ module Docs
 
				       end
			
 
				 
			
 
				       def get_type
			
 
				+
			
 
				+        return "C keywords" if slug =~ /keyword/
			
 
				+
			
 
				         type = at_css('.t-navbar > div:nth-child(4) > :first-child').try(:content)
			
 
				         type.strip!
			
 
				         type.remove! ' library'
			
--- a/lib/docs/filters/c/fix_code.rb
+++ b/lib/docs/filters/c/fix_code.rb
@@ -1,21 +0,0 @@
 
				-module Docs
			
 
				-  class C
			
 
				-    class FixCodeFilter < Filter
			
 
				-      def call
			
 
				-        css('div > span.source-c', 'div > span.source-cpp').each do |node|
			
 
				-          node.inner_html = node.inner_html.gsub(/<br>\n?/, "\n").gsub("\n</p>\n", "</p>\n")
			
 
				-          node.parent.name = 'pre'
			
 
				-          node.parent['class'] = node['class']
			
 
				-          node.parent.content = node.content
			
 
				-        end
			
 
				-
			
 
				-        nbsp = Nokogiri::HTML('&nbsp;').text
			
 
				-        css('pre').each do |node|
			
 
				-          node.content = node.content.gsub(nbsp, ' ')
			
 
				-        end
			
 
				-
			
 
				-        doc
			
 
				-      end
			
 
				-    end
			
 
				-  end
			
 
				-end
			
--- a/lib/docs/filters/c/fix_urls.rb
+++ b/lib/docs/filters/c/fix_urls.rb
@@ -1,11 +0,0 @@
 
				-module Docs
			
 
				-  class C
			
 
				-    class FixUrlsFilter < Filter
			
 
				-      def call
			
 
				-        html.gsub! File.join(C.base_url, C.root_path), C.base_url[0..-2]
			
 
				-        html.gsub! %r{#{C.base_url}([^"']+?)\.html}, "#{C.base_url}\\1"
			
 
				-        html
			
 
				-      end
			
 
				-    end
			
 
				-  end
			
 
				-end
			
--- a/lib/docs/scrapers/c.rb
+++ b/lib/docs/scrapers/c.rb
@@ -1,42 +0,0 @@
 
				-module Docs
			
 
				-  class C < FileScraper
			
 
				-    self.type = 'c'
			
 
				-    self.base_url = 'http://en.cppreference.com/w/c/'
			
 
				-    self.root_path = 'header.html'
			
 
				-
			
 
				-    html_filters.insert_before 'clean_html', 'c/fix_code'
			
 
				-    html_filters.push 'c/entries', 'c/clean_html', 'title'
			
 
				-    text_filters.push 'c/fix_urls'
			
 
				-
			
 
				-    options[:decode_and_clean_paths] = true
			
 
				-    options[:container] = '#content'
			
 
				-    options[:title] = false
			
 
				-    options[:root_title] = 'C Programming Language'
			
 
				-    options[:skip] = %w(language/history.html)
			
 
				-    options[:skip_patterns] = [/experimental/]
			
 
				-
			
 
				-    options[:fix_urls] = ->(url) do
			
 
				-      url.sub! %r{\A.+/http%3A/}, 'http://'
			
 
				-      url.sub! 'http://en.cppreference.com/upload.cppreference.com', 'http://upload.cppreference.com'
			
 
				-      url
			
 
				-    end
			
 
				-
			
 
				-    options[:attribution] = <<-HTML
			
 
				-      &copy; cppreference.com<br>
			
 
				-      Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
			
 
				-    HTML
			
 
				-
			
 
				-    def get_latest_version(opts)
			
 
				-      doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
			
 
				-      link = doc.at_css('a[title^="File:"]')
			
 
				-      date = link.content.scan(/(\d+)\./)[0][0]
			
 
				-      DateTime.strptime(date, '%Y%m%d').to_time.to_i
			
 
				-    end
			
 
				-
			
 
				-    private
			
 
				-
			
 
				-    def file_path_for(*)
			
 
				-      URI.unescape(super)
			
 
				-    end
			
 
				-  end
			
 
				-end
			
--- a/lib/docs/scrapers/cppref/c.rb
+++ b/lib/docs/scrapers/cppref/c.rb
@@ -0,0 +1,12 @@
 
				+module Docs
			
 
				+  class C < Cppref
			
 
				+    self.name = 'c'
			
 
				+    self.slug = 'c'
			
 
				+    self.base_url = 'https://en.cppreference.com/w/c/'
			
 
				+
			
 
				+    html_filters.insert_before 'cppref/clean_html', 'c/entries'
			
 
				+
			
 
				+    options[:root_title] = 'C Programming Language'
			
 
				+
			
 
				+  end
			
 
				+end
			
--- a/lib/docs/scrapers/cppref/cpp.rb
+++ b/lib/docs/scrapers/cppref/cpp.rb
@@ -2,7 +2,6 @@ module Docs
 
				   class Cpp < Cppref
			
 
				     self.name = 'C++'
			
 
				     self.slug = 'cpp'
			
 
				-    self.type = 'c'
			
 
				     self.base_url = 'https://en.cppreference.com/w/cpp/'
			
 
				 
			
 
				     html_filters.insert_before 'cppref/clean_html', 'cpp/entries'
			
@@ -16,13 +15,5 @@ module Docs
 
				       regex/regex_token_iterator/operator_cmp.html
			
 
				     )
			
 
				 
			
 
				-    # Same as get_latest_version in lib/docs/scrapers/c.rb
			
 
				-    def get_latest_version(opts)
			
 
				-      doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
			
 
				-      link = doc.at_css('a[title^="File:"]')
			
 
				-      date = link.content.scan(/(\d+)\./)[0][0]
			
 
				-      DateTime.strptime(date, '%Y%m%d').to_time.to_i
			
 
				-    end
			
 
				-
			
 
				   end
			
 
				 end
			
--- a/lib/docs/scrapers/cppref/cppref.rb
+++ b/lib/docs/scrapers/cppref/cppref.rb
@@ -6,7 +6,7 @@ module Docs
 
				 
			
 
				     html_filters.insert_before 'clean_html', 'cppref/fix_code'
			
 
				     html_filters.push  'cppref/clean_html', 'title'
			
 
				-      # 'cpp20/entries',
			
 
				+
			
 
				     options[:decode_and_clean_paths] = true
			
 
				     options[:container] = '#content'
			
 
				     options[:title] = false
			
@@ -21,9 +21,13 @@ module Docs
 
				       Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
			
 
				     HTML
			
 
				 
			
 
				-    # def get_latest_version
			
 
				-
			
 
				-    # end
			
 
				+    # Check if the 'headers' page has changed
			
 
				+    def get_latest_version(opts)
			
 
				+      doc = fetch_doc(self.base_url + self.root_path, opts)
			
 
				+      date = doc.at_css('#footer-info-lastmod').content
			
 
				+      date = date.match(/[[:digit:]]{1,2} .* [[:digit:]]{4}/).to_s
			
 
				+      date = DateTime.strptime(date, '%e %B %Y').to_time.to_i
			
 
				+    end
			
 
				 
			
 
				   end
			
 
				 end