rust.rb 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. module Docs
  2. class Rust < UrlScraper
  3. self.type = 'rust'
  4. self.release = '1.8.0'
  5. self.base_url = 'https://doc.rust-lang.org/'
  6. self.root_path = 'book/index.html'
  7. self.initial_paths = %w(
  8. reference.html
  9. collections/index.html
  10. std/index.html)
  11. self.links = {
  12. home: 'https://www.rust-lang.org/',
  13. code: 'https://github.com/rust-lang/rust'
  14. }
  15. html_filters.push 'rust/entries', 'rust/clean_html'
  16. options[:only_patterns] = [
  17. /\Abook\//,
  18. /\Acollections\//,
  19. /\Astd\// ]
  20. options[:skip] = %w(book/README.html)
  21. options[:skip_patterns] = [/(?<!\.html)\z/]
  22. options[:fix_urls] = ->(url) do
  23. url.sub! %r{(#{Rust.base_url}.+/)\z}, '\1index.html'
  24. url.sub! '/unicode/u_str', '/unicode/str/'
  25. url
  26. end
  27. options[:attribution] = <<-HTML
  28. &copy; 2016 The Rust Project Developers<br>
  29. Licensed under the Apache License, Version 2.0 or the MIT license, at your option.
  30. HTML
  31. private
  32. REDIRECT_RGX = /http-equiv="refresh"/i
  33. NOT_FOUND_RGX = /<title>Not Found<\/title>/
  34. def process_response?(response)
  35. !(response.body =~ REDIRECT_RGX || response.body =~ NOT_FOUND_RGX || response.body.blank?)
  36. end
  37. end
  38. end