Browse Source

Image scraping: evaluate Content-Length header

Simon Legner 3 years ago
parent
commit
965be77c03
3 changed files with 23 additions and 0 deletions
  1. 5 0
      lib/docs/core/response.rb
  2. 7 0
      lib/docs/filters/core/images.rb
  3. 11 0
      test/lib/docs/core/response_test.rb

+ 5 - 0
lib/docs/core/response.rb

@@ -12,6 +12,11 @@ module Docs
       body.blank?
     end
 
+    def content_length
+      value = headers['Content-Length'] || '0'
+      value.to_i
+    end
+
     def mime_type
       headers['Content-Type'] || 'text/plain'
     end

+ 7 - 0
lib/docs/filters/core/images.rb

@@ -46,6 +46,13 @@ module Docs
               next
             end
 
+            size = response.content_length
+
+            if size > (context[:max_image_size] || DEFAULT_MAX_SIZE)
+              instrument 'too_big.image', url: url, size: size
+              next
+            end
+
             image = response.body
 
             unless context[:optimize_images] == false

+ 11 - 0
test/lib/docs/core/response_test.rb

@@ -63,6 +63,17 @@ class DocsResponseTest < MiniTest::Spec
     end
   end
 
+  describe "#content_length" do
+    it "returns the content type" do
+      options.headers['Content-Length'] = '188420'
+      assert_equal 188420, response.content_length
+    end
+
+    it "defaults to 0" do
+      assert_equal 0, response.content_length
+    end
+  end
+
   describe "#mime_type" do
     it "returns the content type" do
       options.headers['Content-Type'] = 'type'