1
0

cpp.rb 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. module Docs
  2. class Cpp < FileScraper
  3. self.name = 'C++'
  4. self.slug = 'cpp'
  5. self.type = 'c'
  6. self.base_url = 'http://en.cppreference.com/w/cpp/'
  7. self.root_path = 'header.html'
  8. html_filters.insert_before 'clean_html', 'c/fix_code'
  9. html_filters.push 'cpp/entries', 'c/clean_html', 'title'
  10. text_filters.push 'cpp/fix_urls'
  11. options[:decode_and_clean_paths] = true
  12. options[:container] = '#content'
  13. options[:title] = false
  14. options[:root_title] = 'C++ Programming Language'
  15. options[:skip] = %w(
  16. language/extending_std.html
  17. language/history.html
  18. regex/ecmascript.html
  19. regex/regex_token_iterator/operator_cmp.html
  20. )
  21. options[:skip_patterns] = [/experimental/]
  22. options[:only_patterns] = [/\.html\z/]
  23. options[:fix_urls] = ->(url) do
  24. url = CGI.unescape(url)
  25. url.sub! %r{\A.+/http%3A/}, 'http://'
  26. url.sub! 'http://en.cppreference.com/upload.cppreference.com', 'http://upload.cppreference.com'
  27. url
  28. end
  29. options[:attribution] = <<-HTML
  30. &copy; cppreference.com<br>
  31. Licensed under the Creative Commons Attribution-ShareAlike Unported License v3.0.
  32. HTML
  33. # Same as get_latest_version in lib/docs/scrapers/c.rb
  34. def get_latest_version(opts)
  35. doc = fetch_doc('https://en.cppreference.com/w/Cppreference:Archives', opts)
  36. link = doc.at_css('a[title^="File:"]')
  37. date = link.content.scan(/(\d+)\./)[0][0]
  38. DateTime.strptime(date, '%Y%m%d').to_time.to_i
  39. end
  40. private
  41. def file_path_for(*)
  42. URI.unescape(super)
  43. end
  44. end
  45. end