Skip to content

Commit 3eb5ccb

Browse files
committed
Raise error and stop scraping on 4xx/5xx status code
1 parent b4dcb63 commit 3eb5ccb

4 files changed

Lines changed: 36 additions & 1 deletion

File tree

lib/docs/core/response.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ def success?
44
code == 200
55
end
66

7+
def error?
8+
code != 404 && code >= 400 && code <= 599
9+
end
10+
711
def empty?
812
body.empty?
913
end

lib/docs/core/scrapers/url_scraper.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ def request_options
2929
end
3030

3131
def process_response?(response)
32+
if response.error?
33+
raise "Error status code (#{response.code}): #{response.url}"
34+
end
35+
3236
response.success? && response.html? && base_url.contains?(response.effective_url)
3337
end
3438

test/lib/docs/core/response_test.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,28 @@ class DocsResponseTest < MiniTest::Spec
2929
end
3030
end
3131

32+
describe "#error?" do
33+
it "returns false when the code is 200" do
34+
options.code = 200
35+
refute response.error?
36+
end
37+
38+
it "returns false when the code is 404" do
39+
options.code = 404
40+
refute response.error?
41+
end
42+
43+
it "returns true when the code is 400" do
44+
options.code = 400
45+
assert response.error?
46+
end
47+
48+
it "returns true when the code is 500" do
49+
options.code = 500
50+
assert response.error?
51+
end
52+
end
53+
3254
describe "#empty?" do
3355
it "returns true when the body is empty" do
3456
options.body = ''

test/lib/docs/core/scrapers/url_scraper_test.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,18 @@ class Scraper < Docs::UrlScraper
8989

9090
describe "#process_response?" do
9191
let :response do
92-
OpenStruct.new success?: true, html?: true, effective_url: scraper.root_url
92+
OpenStruct.new success?: true, html?: true, effective_url: scraper.root_url, error?: false
9393
end
9494

9595
let :result do
9696
scraper.send :process_response?, response
9797
end
9898

99+
it "raises when the response is an error" do
100+
response.send 'error?=', true
101+
assert_raises(RuntimeError) { result }
102+
end
103+
99104
it "returns false when the response isn't successful" do
100105
response.send 'success?=', false
101106
refute result

0 commit comments

Comments
 (0)