1
0

file_scraper.rb 963 B

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. module Docs
  2. class FileScraper < Scraper
  3. Response = Struct.new :body, :url
  4. class << self
  5. attr_accessor :dir
  6. def inherited(subclass)
  7. super
  8. subclass.base_url = base_url
  9. subclass.dir = dir
  10. end
  11. end
  12. self.base_url = 'http://localhost/'
  13. html_filters.push 'clean_local_urls'
  14. private
  15. def request_one(url)
  16. Response.new read_file(file_path_for(url)), URL.parse(url)
  17. end
  18. def request_all(urls)
  19. queue = [urls].flatten
  20. until queue.empty?
  21. result = yield request_one(queue.shift)
  22. queue.concat(result) if result.is_a? Array
  23. end
  24. end
  25. def process_response?(response)
  26. response.body.present?
  27. end
  28. def file_path_for(url)
  29. File.join self.class.dir, url.remove(base_url.to_s)
  30. end
  31. def read_file(path)
  32. File.read(path)
  33. rescue
  34. instrument 'warn.doc', msg: "Failed to open file: #{path}"
  35. nil
  36. end
  37. end
  38. end