requester.rb 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. module Docs
  2. class Requester < Typhoeus::Hydra
  3. include Instrumentable
  4. attr_reader :request_options
  5. def self.run(urls, options = {}, &block)
  6. urls = urls.dup
  7. requester = new(options)
  8. requester.on_response(&block) if block_given?
  9. requester.on_response do # cheap hack to ensure root page is processed first
  10. if urls
  11. requester.request(urls)
  12. urls = nil
  13. end
  14. end
  15. requester.request(urls.shift)
  16. requester.run
  17. requester
  18. end
  19. def initialize(options = {})
  20. @request_options = options.extract!(:request_options)[:request_options].try(:dup) || {}
  21. options[:max_concurrency] ||= 20
  22. options[:pipelining] = 0
  23. super
  24. end
  25. def request(urls, options = {}, &block)
  26. requests = [urls].flatten.map do |url|
  27. build_and_queue_request(url, options, &block)
  28. end
  29. requests.length == 1 ? requests.first : requests
  30. end
  31. def queue(request)
  32. request.on_complete(&method(:handle_response))
  33. super
  34. end
  35. def on_response(&block)
  36. @on_response ||= []
  37. @on_response << block if block
  38. @on_response
  39. end
  40. private
  41. def build_and_queue_request(url, options = {}, &block)
  42. request = Request.new(url, **request_options.merge(options))
  43. request.on_complete(&block) if block
  44. queue(request)
  45. request
  46. end
  47. def handle_response(response)
  48. instrument 'handle_response.requester', url: response.url do
  49. on_response.each do |callback|
  50. result = callback.call(response)
  51. result.each { |url| request(url) } if result.is_a?(Array)
  52. end
  53. end
  54. end
  55. end
  56. end