file_scraper.rb 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. module Docs
  2. class FileScraper < Scraper
  3. SOURCE_DIRECTORY = File.expand_path '../../../../../docs', __FILE__
  4. Response = Struct.new :body, :url
  5. class << self
  6. def inherited(subclass)
  7. super
  8. subclass.base_url = base_url
  9. end
  10. end
  11. self.base_url = 'http://localhost/'
  12. html_filters.push 'clean_local_urls'
  13. def source_directory
  14. @source_directory ||= File.join(SOURCE_DIRECTORY, self.class.path)
  15. end
  16. private
  17. def assert_source_directory_exists
  18. unless Dir.exists?(source_directory)
  19. raise SetupError, "The #{self.class.name} scraper requires the original documentation files to be stored in the \"#{source_directory}\" directory."
  20. end
  21. end
  22. def request_one(url)
  23. assert_source_directory_exists
  24. Response.new read_file(File.join(source_directory, url_to_path(url))), URL.parse(url)
  25. end
  26. def request_all(urls)
  27. assert_source_directory_exists
  28. queue = [urls].flatten
  29. until queue.empty?
  30. result = yield request_one(queue.shift)
  31. queue.concat(result) if result.is_a? Array
  32. end
  33. end
  34. def process_response?(response)
  35. response.body.present?
  36. end
  37. def url_to_path(url)
  38. url.remove(base_url.to_s)
  39. end
  40. def read_file(path)
  41. File.read(path)
  42. rescue
  43. instrument 'warn.doc', msg: "Failed to open file: #{path}"
  44. nil
  45. end
  46. end
  47. end