Browse Source

Add <base> support

Thibaut Courouble 9 years ago
parent
commit
4e41ed9f25

+ 3 - 1
lib/docs/core/filter.rb

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 module Docs
   class Filter < ::HTML::Pipeline::Filter
     def css(*args)
@@ -73,7 +75,7 @@ module Docs
     end
 
     def relative_url_string?(str)
-      !fragment_url_string?(str) && str !~ SCHEME_RGX
+      str !~ SCHEME_RGX && !fragment_url_string?(str) && !data_url_string?(str)
     end
 
     def absolute_url_string?(str)

+ 1 - 1
lib/docs/core/scraper.rb

@@ -41,7 +41,7 @@ module Docs
     self.html_filters = FilterStack.new
     self.text_filters = FilterStack.new
 
-    html_filters.push 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
+    html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
     text_filters.push 'inner_html', 'clean_text', 'attribution'
 
     def initialize

+ 1 - 1
lib/docs/core/scrapers/url_scraper.rb

@@ -129,7 +129,7 @@ module Docs
 
       def fetch_redirections
         result = {}
-        with_filters 'container', 'normalize_urls', 'internal_urls' do
+        with_filters 'apply_base_url', 'container', 'normalize_urls', 'internal_urls' do
           build_pages do |page|
             next if page[:response_effective_path] == page[:response_path]
             result[page[:response_path].downcase] = page[:response_effective_path]

+ 21 - 0
lib/docs/filters/core/apply_base_url.rb

@@ -0,0 +1,21 @@
+module Docs
+  class ApplyBaseUrlFilter < Filter
+    URL_ATTRIBUTES = { 'a': 'href', 'img': 'src', 'iframe': 'src' }
+    SCHEME_RGX = /\A[^:\/?#]+:/
+
+    def call
+      base_url = at_css('base').try(:[], 'href')
+      return doc unless base_url
+
+      URL_ATTRIBUTES.each_pair do |tag, attribute|
+        css(tag).each do |node|
+          next unless value = node[attribute]
+          next if !relative_url_string?(value) || value[0] == '/'.freeze
+          node[attribute] = "#{base_url}#{node[attribute]}"
+        end
+      end
+
+      doc
+    end
+  end
+end

+ 4 - 0
test/lib/docs/core/filter_test.rb

@@ -149,6 +149,10 @@ class DocsFilterTest < MiniTest::Spec
     it "returns false with 'mailto:test@example.com'" do
       refute filter.relative_url_string?('mailto:test@example.com')
     end
+
+    it "returns false with 'data:image/gif;base64,foo'" do
+      refute filter.relative_url_string?('data:image/gif;base64,foo')
+    end
   end
 
   describe "#absolute_url_string?" do

+ 69 - 0
test/lib/docs/filters/core/apply_base_url_test.rb

@@ -0,0 +1,69 @@
+require 'test_helper'
+require 'docs'
+
+class ApplyBaseUrlFilterTest < MiniTest::Spec
+  include FilterTestHelper
+  self.filter_class = Docs::ApplyBaseUrlFilter
+  self.filter_type = 'html'
+
+  context "when there is no <base>" do
+    it "does nothing" do
+      @body = make_body nil, link_to('test')
+      assert_equal link_to('test'), filter_output.at_css('body').inner_html
+    end
+  end
+
+  context "when <base> is '/base/'" do
+    it "rewrites relative urls" do
+      @body = make_body '/base/', link_to('path#frag')
+      assert_equal link_to('/base/path#frag'), filter_output.at_css('body').inner_html
+    end
+
+    it "rewrites relative image urls" do
+      @body = make_body '/base/', '<img src="../img.png">'
+      assert_equal '<img src="/base/../img.png">', filter_output.at_css('body').inner_html
+    end
+
+    it "rewrites relative iframe urls" do
+      @body = make_body '/base/', '<iframe src="./test"></iframe>'
+      assert_equal '<iframe src="/base/./test"></iframe>', filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite absolute urls" do
+      @body = make_body '/base/', link_to('http://example.com')
+      assert_equal link_to('http://example.com'), filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite protocol-less urls" do
+      @body = make_body '/base/', link_to('//example.com')
+      assert_equal link_to('//example.com'), filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite root-relative urls" do
+      @body = make_body '/base/', link_to('/path')
+      assert_equal link_to('/path'), filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite fragment-only urls" do
+      @body = make_body '/base/', link_to('#test')
+      assert_equal link_to('#test'), filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite email urls" do
+      @body = make_body '/base/', link_to('mailto:test@example.com')
+      assert_equal link_to('mailto:test@example.com'), filter_output.at_css('body').inner_html
+    end
+
+    it "doesn't rewrite data urls" do
+      @body = make_body '/base/', '<img src="data:image/gif;base64,aaaa">'
+      assert_equal '<img src="data:image/gif;base64,aaaa">', filter_output.at_css('body').inner_html
+    end
+  end
+
+  private
+
+  def make_body(base, body)
+    base = %(<base href="#{base}">) if base
+    "<html><meta charset=utf-8><title></title>#{base}#{body}</html>"
+  end
+end