file_scraper.rb 861 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. module Docs
  2. class FileScraper < Scraper
  3. Response = Struct.new :body, :url
  4. class << self
  5. attr_accessor :dir
  6. def inherited(subclass)
  7. super
  8. subclass.base_url = base_url
  9. end
  10. end
  11. self.base_url = 'http://localhost/'
  12. html_filters.push 'clean_local_urls'
  13. private
  14. def request_one(url)
  15. Response.new read_file(file_path_for(url)), URL.parse(url)
  16. end
  17. def request_all(urls)
  18. queue = [urls].flatten
  19. until queue.empty?
  20. result = yield request_one(queue.shift)
  21. queue.concat(result) if result.is_a? Array
  22. end
  23. end
  24. def process_response?(response)
  25. response.body.present?
  26. end
  27. def file_path_for(url)
  28. File.join self.class.dir, url.remove(base_url.to_s)
  29. end
  30. def read_file(path)
  31. File.read(path) rescue nil
  32. end
  33. end
  34. end