chenxixian
/
devdocs
réplica de https://github.com/freeCodeCamp/devdocs


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
							require 'test_helper'
require 'docs'

class DocsScraperTest < MiniTest::Spec
  class Scraper < Docs::Scraper
    self.type = 'scraper'
    self.base_url = 'http://example.com/'
    self.root_path = '/root'
    self.html_filters = Docs::FilterStack.new
    self.text_filters = Docs::FilterStack.new
  end

  let :scraper do
    Scraper.new.tap do |scraper|
      scraper.extend FakeInstrumentation
    end
  end

  let :response do
    Struct.new(:body, :url).new
  end

  let :processed_response do
    Hash.new
  end

  describe ".inherited" do
    let :subclass do
      Class.new Scraper
    end

    it "sets .type" do
      assert_equal Scraper.type, subclass.type
    end

    it "sets .root_path" do
      assert_equal Scraper.root_path, subclass.root_path
    end

    it "duplicates .options" do
      stub(Scraper).options { { test: [] } }
      assert_equal Scraper.options, subclass.options
      refute_same Scraper.options, subclass.options
      refute_same Scraper.options[:test], subclass.options[:test]
    end

    it "duplicates .html_filters" do
      assert_equal Scraper.html_filters, subclass.html_filters
      refute_same Scraper.html_filters, subclass.html_filters
    end

    it "duplicates .text_filters" do
      assert_equal Scraper.text_filters, subclass.text_filters
      refute_same Scraper.text_filters, subclass.text_filters
    end
  end

  describe ".filters" do
    it "returns the union of .html_filters and .text_filters" do
      stub(Scraper.html_filters).to_a { [1] }
      stub(Scraper.text_filters).to_a { [2] }
      assert_equal [1, 2], Scraper.filters
    end
  end

  describe "#root_path?" do
    it "returns false when .root_path is blank" do
      stub(Scraper).root_path { '' }
      refute scraper.root_path?
    end

    it "returns false when .root_path is '/'" do
      stub(Scraper).root_path { '/' }
      refute scraper.root_path?
    end

    it "returns true when .root_path is '/path'" do
      stub(Scraper).root_path { '/path' }
      assert scraper.root_path?
    end
  end

  describe "#root_url" do
    context "when #root_path? is false" do
      before do
        stub(scraper).root_path? { false }
      end

      it "returns a Docs::URL" do
        assert_instance_of Docs::URL, scraper.root_url
      end

      it "returns the normalized base url" do
        stub(Scraper).base_url { 'http://example.com' }
        assert_equal 'http://example.com/', scraper.root_url.to_s
      end
    end

    context "when .root_path isn't blank" do
      before do
        stub(scraper).root_path? { true }
      end

      it "returns a Docs::URL" do
        assert_instance_of Docs::URL, scraper.root_url
      end

      it "returns base url + root path" do
        stub(Scraper).base_url { 'http://example.com/path/' }
        assert_equal 'http://example.com/path/root', scraper.root_url.to_s
      end
    end
  end

  describe "#build_page" do
    before do
      stub(scraper).handle_response
    end

    it "requires a path" do
      assert_raises ArgumentError do
        scraper.build_page
      end
    end

    context "with a blank path" do
      it "requests the root url" do
        mock(scraper).request_one(scraper.root_url.to_s)
        scraper.build_page ''
      end
    end

    context "with '/'" do
      it "requests the root url" do
        mock(scraper).request_one(scraper.root_url.to_s)
        scraper.build_page '/'
      end
    end

    context "with '/file'" do
      it "requests 'example.com/file' when the base url is 'example.com" do
        stub(Scraper).base_url { 'http://example.com' }
        mock(scraper).request_one 'http://example.com/file'
        scraper.build_page '/file'
      end

      it "requests 'example.com/file' when the base url is 'example.com/" do
        stub(Scraper).base_url { 'http://example.com/' }
        mock(scraper).request_one 'http://example.com/file'
        scraper.build_page '/file'
      end
    end

    it "returns the processed response" do
      stub(scraper).request_one { response }
      mock(scraper).handle_response(response) { 'test' }
      assert_equal 'test', scraper.build_page('')
    end

    it "yields the processed response" do
      stub(scraper).request_one { response }
      stub(scraper).handle_response(response) { 'test' }
      scraper.build_page('') { |arg| @arg = arg }
      assert @arg
      assert_equal 'test', @arg
    end
  end

  describe "#build_pages" do
    let :block do
      Proc.new {}
    end

    it "requests the root url" do
      mock(scraper).request_all(scraper.root_url.to_s)
      scraper.build_pages(&block)
    end

    it "instruments 'running'" do
      stub(scraper).request_all
      scraper.build_pages(&block)
      assert scraper.last_instrumentation
      assert_equal 'running.scraper', scraper.last_instrumentation[:event]
      assert_includes scraper.last_instrumentation[:payload][:urls], scraper.root_url.to_s
    end

    context "when the response is processable" do
      before do
        stub(scraper).request_all.with_any_args { |*args| @returned = args.last.call(response) }
        stub(scraper).handle_response(response) { processed_response }
      end

      it "yields the processed response" do
        scraper.build_pages { |arg| @arg = arg }
        assert @arg
        assert_equal processed_response, @arg
      end

      context "when response[:internal_urls] is empty" do
        before do
          processed_response[:internal_urls] = []
        end

        it "requests nothing more" do
          scraper.build_pages(&block)
          assert_nil @returned
        end

        it "doesn't instrument 'queued'" do
          scraper.build_pages(&block)
          refute_equal 'queued.scraper', scraper.last_instrumentation.try(:[], :event)
        end
      end

      context "when response[:internal_urls] isn't empty" do
        let :urls do
          ['Url']
        end

        before do
          processed_response[:internal_urls] = urls
        end

        it "requests the urls" do
          scraper.build_pages(&block)
          assert_equal urls, @returned
        end

        it "doesn't request the same url twice irrespective of case" do
          stub(Scraper).root_path { 'PATH' }
          processed_response[:internal_urls] = [scraper.root_url.to_s.swapcase]
          scraper.build_pages(&block)
          assert_empty @returned
        end

        it "instruments 'queued'" do
          scraper.build_pages(&block)
          assert scraper.last_instrumentation
          assert_equal 'queued.scraper', scraper.last_instrumentation[:event]
          assert_equal urls, scraper.last_instrumentation[:payload][:urls]
        end
      end
    end

    context "when the response isn't processable" do
      it "doesn't yield" do
        stub(scraper).request_all.yields(response)
        stub(scraper).handle_response(response) { nil }
        scraper.build_pages { @yield = true }
        refute @yield
      end
    end
  end

  describe "#options" do
    let :options do
      scraper.options
    end

    it "returns a frozen, memoized Hash" do
      assert_instance_of Hash, options
      assert options.frozen?
      assert_same options, scraper.options
    end

    it "includes .options" do
      stub(Scraper).options { { test: true } }
      assert options[:test]
    end

    it "includes #base_url" do
      assert_equal scraper.base_url, options[:base_url]
    end

    it "includes #root_url" do
      assert_equal scraper.root_url, options[:root_url]
    end

    it "includes .root_path" do
      assert_equal '/root', options[:root_path]
    end

    context "when #root_path? is false" do
      before do
        stub(scraper).root_path? { false }
      end

      it "doesn't modify :skip" do
        assert_nil options[:skip]
      end

      context "and :only is an array" do
        before do
          stub(Scraper).options { { only: ['/path'] } }
        end

        it "adds ['', '/']" do
          assert_includes options[:only], '/path'
          assert_includes options[:only], ''
          assert_includes options[:only], '/'
        end

        it "doesn't modify the array in place" do
          assert_equal ['/path'], Scraper.options[:only]
        end
      end

      context "and :only_patterns is an array" do
        it "assigns ['', '/'] to :only" do
          stub(Scraper).options { { only_patterns: [] } }
          assert_equal ['', '/'], options[:only]
        end
      end
    end

    context "when #root_path? is true" do
      before do
        stub(scraper).root_path? { true }
      end

      context "and :skip is nil" do
        it "assigns it ['', '/']" do
          assert_equal ['', '/'], options[:skip]
        end
      end

      context "and :skip is an array" do
        before do
          stub(Scraper).options { { skip: ['/path'] } }
        end

        it "adds ['', '/']" do
          assert_includes options[:skip], '/path'
          assert_includes options[:skip], ''
          assert_includes options[:skip], '/'
        end

        it "doesn't modify the array in place" do
          assert_equal ['/path'], Scraper.options[:skip]
        end
      end

      context "and :only is an array" do
        it "adds .root_path" do
          stub(Scraper).options { { only: [] } }
          assert_includes options[:only], '/root'
        end
      end

      context "and :only_patterns is an array" do
        it "assigns [.root_path] to :only" do
          stub(Scraper).options { { only_patterns: [] } }
          assert_equal ['/root'], options[:only]
        end
      end
    end
  end

  describe "#handle_response" do
    let :result do
      scraper.send :handle_response, response
    end

    context "when the response is processable" do
      before do
        stub(scraper).process_response?(response) { true }
      end

      it "runs the pipeline" do
        mock(scraper.pipeline).call.with_any_args
        result
      end

      it "returns the result" do
        stub(scraper.pipeline).call { |_, _, result| result[:test] = true }
        assert result[:test]
      end

      it "instruments 'process_response'" do
        result
        assert scraper.last_instrumentation
        assert_equal 'process_response.scraper', scraper.last_instrumentation[:event]
        assert_equal response, scraper.last_instrumentation[:payload][:response]
      end

      context "the pipeline document" do
        it "is the parsed response body" do
          response.body = 'body'
          stub(scraper.pipeline).call { |arg| @arg = arg }
          mock.proxy(Docs::Parser).new('body') { |parser| stub(parser).html { 'html' } }
          result
          assert_equal 'html', @arg
        end
      end

      context "the pipeline context" do
        let :context do
          stub(scraper.pipeline).call { |_, arg| @arg = arg }
          result
          @arg
        end

        it "includes #options" do
          stub(scraper).options { { test: true } }
          assert context[:test]
        end

        it "includes the response url" do
          response.url = 'url'
          assert_equal 'url', context[:url]
        end
      end
    end

    context "when the response isn't processable" do
      before do
        stub(scraper).process_response?(response) { false }
      end

      it "doesn't run the pipeline" do
        dont_allow(scraper.pipeline).call
        result
      end

      it "returns nil" do
        assert_nil result
      end

      it "instruments 'ignore_response'" do
        result
        assert scraper.last_instrumentation
        assert_equal 'ignore_response.scraper', scraper.last_instrumentation[:event]
        assert_equal response, scraper.last_instrumentation[:payload][:response]
      end
    end
  end

  describe "#pipeline" do
    it "returns an HTML::Pipeline with .filters" do
      stub(Scraper).filters { [1] }
      assert_instance_of ::HTML::Pipeline, scraper.pipeline
      assert_equal Scraper.filters, scraper.pipeline.filters
    end

    it "is memoized" do
      assert_same scraper.pipeline, scraper.pipeline
    end

    it "assigns Docs as the pipeline's instrumentation service" do
      assert_equal Docs, scraper.pipeline.instrumentation_service
    end
  end
end