Skip to content

Commit 4e41ed9

Browse files
committed
Add <base> support
1 parent 6f0214e commit 4e41ed9

6 files changed

Lines changed: 99 additions & 3 deletions

File tree

lib/docs/core/filter.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# frozen_string_literal: true
2+
13
module Docs
24
class Filter < ::HTML::Pipeline::Filter
35
def css(*args)
@@ -73,7 +75,7 @@ def data_url_string?(str)
7375
end
7476

7577
def relative_url_string?(str)
76-
!fragment_url_string?(str) && str !~ SCHEME_RGX
78+
str !~ SCHEME_RGX && !fragment_url_string?(str) && !data_url_string?(str)
7779
end
7880

7981
def absolute_url_string?(str)

lib/docs/core/scraper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def stub(path, &block)
4141
self.html_filters = FilterStack.new
4242
self.text_filters = FilterStack.new
4343

44-
html_filters.push 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
44+
html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
4545
text_filters.push 'inner_html', 'clean_text', 'attribution'
4646

4747
def initialize

lib/docs/core/scrapers/url_scraper.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def with_redirections
129129

130130
def fetch_redirections
131131
result = {}
132-
with_filters 'container', 'normalize_urls', 'internal_urls' do
132+
with_filters 'apply_base_url', 'container', 'normalize_urls', 'internal_urls' do
133133
build_pages do |page|
134134
next if page[:response_effective_path] == page[:response_path]
135135
result[page[:response_path].downcase] = page[:response_effective_path]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module Docs
2+
class ApplyBaseUrlFilter < Filter
3+
URL_ATTRIBUTES = { 'a': 'href', 'img': 'src', 'iframe': 'src' }
4+
SCHEME_RGX = /\A[^:\/?#]+:/
5+
6+
def call
7+
base_url = at_css('base').try(:[], 'href')
8+
return doc unless base_url
9+
10+
URL_ATTRIBUTES.each_pair do |tag, attribute|
11+
css(tag).each do |node|
12+
next unless value = node[attribute]
13+
next if !relative_url_string?(value) || value[0] == '/'.freeze
14+
node[attribute] = "#{base_url}#{node[attribute]}"
15+
end
16+
end
17+
18+
doc
19+
end
20+
end
21+
end

test/lib/docs/core/filter_test.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ def slug(subpath)
149149
it "returns false with 'mailto:[email protected]'" do
150150
refute filter.relative_url_string?('mailto:[email protected]')
151151
end
152+
153+
it "returns false with 'data:image/gif;base64,foo'" do
154+
refute filter.relative_url_string?('data:image/gif;base64,foo')
155+
end
152156
end
153157

154158
describe "#absolute_url_string?" do
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
require 'test_helper'
2+
require 'docs'
3+
4+
class ApplyBaseUrlFilterTest < MiniTest::Spec
5+
include FilterTestHelper
6+
self.filter_class = Docs::ApplyBaseUrlFilter
7+
self.filter_type = 'html'
8+
9+
context "when there is no <base>" do
10+
it "does nothing" do
11+
@body = make_body nil, link_to('test')
12+
assert_equal link_to('test'), filter_output.at_css('body').inner_html
13+
end
14+
end
15+
16+
context "when <base> is '/base/'" do
17+
it "rewrites relative urls" do
18+
@body = make_body '/base/', link_to('path#frag')
19+
assert_equal link_to('/base/path#frag'), filter_output.at_css('body').inner_html
20+
end
21+
22+
it "rewrites relative image urls" do
23+
@body = make_body '/base/', '<img src="../img.png">'
24+
assert_equal '<img src="/base/../img.png">', filter_output.at_css('body').inner_html
25+
end
26+
27+
it "rewrites relative iframe urls" do
28+
@body = make_body '/base/', '<iframe src="./test"></iframe>'
29+
assert_equal '<iframe src="/base/./test"></iframe>', filter_output.at_css('body').inner_html
30+
end
31+
32+
it "doesn't rewrite absolute urls" do
33+
@body = make_body '/base/', link_to('http://example.com')
34+
assert_equal link_to('http://example.com'), filter_output.at_css('body').inner_html
35+
end
36+
37+
it "doesn't rewrite protocol-less urls" do
38+
@body = make_body '/base/', link_to('//example.com')
39+
assert_equal link_to('//example.com'), filter_output.at_css('body').inner_html
40+
end
41+
42+
it "doesn't rewrite root-relative urls" do
43+
@body = make_body '/base/', link_to('/path')
44+
assert_equal link_to('/path'), filter_output.at_css('body').inner_html
45+
end
46+
47+
it "doesn't rewrite fragment-only urls" do
48+
@body = make_body '/base/', link_to('#test')
49+
assert_equal link_to('#test'), filter_output.at_css('body').inner_html
50+
end
51+
52+
it "doesn't rewrite email urls" do
53+
@body = make_body '/base/', link_to('mailto:[email protected]')
54+
assert_equal link_to('mailto:[email protected]'), filter_output.at_css('body').inner_html
55+
end
56+
57+
it "doesn't rewrite data urls" do
58+
@body = make_body '/base/', '<img src="data:image/gif;base64,aaaa">'
59+
assert_equal '<img src="data:image/gif;base64,aaaa">', filter_output.at_css('body').inner_html
60+
end
61+
end
62+
63+
private
64+
65+
def make_body(base, body)
66+
base = %(<base href="#{base}">) if base
67+
"<html><meta charset=utf-8><title></title>#{base}#{body}</html>"
68+
end
69+
end

0 commit comments

Comments
 (0)