c.rb 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. module Docs
  2. class C < FileScraper
  3. self.type = 'c'
  4. self.base_url = 'http://en.cppreference.com/w/c/'
  5. self.root_path = 'header.html'
  6. html_filters.insert_before 'clean_html', 'c/fix_code'
  7. html_filters.push 'c/entries', 'c/clean_html', 'title'
  8. text_filters.push 'c/fix_urls'
  9. options[:decode_and_clean_paths] = true
  10. options[:container] = '#content'
  11. options[:title] = false
  12. options[:root_title] = 'C Programming Language'
  13. options[:skip] = %w(language/history.html)
  14. options[:skip_patterns] = [/experimental/]
  15. options[:fix_urls] = ->(url) do
  16. url.sub! %r{\A.+/http%3A/}, 'http://'
  17. url.sub! 'http://en.cppreference.com/upload.cppreference.com', 'http://upload.cppreference.com'
  18. url
  19. end
  20. options[:attribution] = <<-HTML
  21. &copy; cppreference.com<br>
  22. Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
  23. HTML
  24. def get_latest_version(options, &block)
  25. fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', options) do |doc|
  26. link = doc.at_css('a[title^="File:"]')
  27. date = link.content.scan(/(\d+)\./)[0][0]
  28. block.call DateTime.strptime(date, '%Y%m%d').to_time.to_i
  29. end
  30. end
  31. private
  32. def file_path_for(*)
  33. URI.unescape(super)
  34. end
  35. end
  36. end