Skip to content

Commit ebfe3a1

Browse files
ShaneQfulThibaut
authored andcommitted
Changed scraper to get list of urls programmatically & improved the get type method in the entries filter
1 parent 3465933 commit ebfe3a1

2 files changed

Lines changed: 26 additions & 173 deletions

File tree

lib/docs/filters/dojo/entries.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ def get_name
66
end
77

88
def get_type
9-
name
9+
list_of_names = name.split(/\/|\./)
10+
list_of_names.pop
11+
list_of_names.join("/")
1012
end
1113
end
1214
end

lib/docs/scrapers/dojo.rb

Lines changed: 23 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1,179 +1,13 @@
11
module Docs
22
class Dojo < UrlScraper
3+
include StubRootPage
34
self.name = 'Dojo'
45
self.slug = 'dojo'
56
self.type = 'dojo'
67
self.version = '1.10'
78
self.base_url = 'http://dojotoolkit.org/api/1.10/'
89

9-
# This is a cut down list of the actually paths taken from the tree.json api on the dojo site
10-
# Dojo used javascript and xhr requests to allow users to browse it's documentation so it can't
11-
# be scrapped by just following links from the base page. This list was generating with a little
12-
# bash and then cut down in order to remove a lot of the more unused documentation e.g. kernel,
13-
# main, dnd and some others
14-
self.initial_paths = %w(
15-
dojo/AdapterRegistry
16-
dojo/aspect
17-
dojo/back
18-
dojo/_base/array
19-
dojo/_base/browser
20-
dojo/_base/Color
21-
dojo/_base/Color.named
22-
dojo/_base/config
23-
dojo/_base/config.modulePaths
24-
dojo/_base/connect
25-
dojo/_base/declare
26-
dojo/_base/Deferred
27-
dojo/_base/event
28-
dojo/_base/fx
29-
dojo/_base/html
30-
dojo/_base/json
31-
dojo/_base/kernel
32-
dojo/_base/lang
33-
dojo/_base/loader
34-
dojo/_base/NodeList
35-
dojo/_base/query
36-
dojo/_base/sniff
37-
dojo/_base/unload
38-
dojo/_base/window
39-
dojo/_base/window.doc
40-
dojo/_base/window.global
41-
dojo/_base/xhr
42-
dojo/_base/xhr.contentHandlers
43-
dojo/behavior
44-
dojo/cache
45-
dojo/cldr/monetary
46-
dojo/cldr/supplemental
47-
dojo/colors
48-
dojo/cookie
49-
dojo/currency
50-
dojo/data/api/Identity
51-
dojo/data/api/Item
52-
dojo/data/api/Notification
53-
dojo/data/api/Read
54-
dojo/data/api/Request
55-
dojo/data/api/Write
56-
dojo/data/ItemFileReadStore
57-
dojo/data/ItemFileWriteStore
58-
dojo/data/ObjectStore
59-
dojo/data/util/filter
60-
dojo/data/util/simpleFetch
61-
dojo/data/util/sorter
62-
dojo/date
63-
dojo/date/locale
64-
dojo/date/stamp
65-
dojo/debounce
66-
dojo/Deferred
67-
dojo/DeferredList
68-
dojo/dom
69-
dojo/dom-attr
70-
dojo/dom-class
71-
dojo/dom-construct
72-
dojo/dom-form
73-
dojo/dom-geometry
74-
dojo/dom-prop
75-
dojo/dom-prop.names
76-
dojo/domReady
77-
dojo/dom-style
78-
dojo/errors/CancelError
79-
dojo/errors/create
80-
dojo/errors/RequestError
81-
dojo/errors/RequestTimeoutError
82-
dojo/Evented
83-
dojo/fx
84-
dojo/fx/easing
85-
dojo/fx.easing
86-
dojo/fx/Toggler
87-
dojo/fx.Toggler
88-
dojo/gears
89-
dojo/gears.available
90-
dojo/has
91-
dojo/hash
92-
dojo/hccss
93-
dojo/html
94-
dojo/html._ContentSetter
95-
dojo/i18n
96-
dojo/i18n.cache
97-
dojo/io/iframe
98-
dojo/io-query
99-
dojo/io/script
100-
dojo/json
101-
dojo/keys
102-
dojo/loadInit
103-
dojo/main
104-
dojo/mouse
105-
dojo/node
106-
dojo/NodeList
107-
dojo/NodeList-data
108-
dojo/NodeList-dom
109-
dojo/NodeList-fx
110-
dojo/NodeList-html
111-
dojo/NodeList-manipulate
112-
dojo/NodeList._nodeDataCache
113-
dojo/NodeList-traverse
114-
dojo/number
115-
dojo/on
116-
dojo/on/asyncEventListener
117-
dojo/on/debounce
118-
dojo/on/throttle
119-
dojo/parser
120-
dojo/promise/all
121-
dojo/promise/first
122-
dojo/promise/instrumentation
123-
dojo/promise/Promise
124-
dojo/promise/tracer
125-
dojo/query
126-
dojo/ready
127-
dojo/regexp
128-
dojo/request
129-
dojo/request/default
130-
dojo/request/handlers
131-
dojo/request/iframe
132-
dojo/request/node
133-
dojo/request/notify
134-
dojo/request/registry
135-
dojo/request/script
136-
dojo/request/util
137-
dojo/request/watch
138-
dojo/request/xhr
139-
dojo/require
140-
dojo/robot
141-
dojo/robot._runsemaphore
142-
dojo/robotx
143-
dojo/robotx._runsemaphore
144-
dojo/router
145-
dojo/router/RouterBase
146-
dojo/rpc/JsonpService
147-
dojo/rpc/JsonService
148-
dojo/rpc/RpcService
149-
dojo/selector/acme
150-
dojo/selector/lite
151-
dojo/selector/_loader
152-
dojo/sniff
153-
dojo/Stateful
154-
dojo/store/api/Store
155-
dojo/store/api/Store.PutDirectives
156-
dojo/store/api/Store.QueryOptions
157-
dojo/store/api/Store.QueryResults
158-
dojo/store/api/Store.SortInformation
159-
dojo/store/api/Store.Transaction
160-
dojo/store/Cache
161-
dojo/store/DataStore
162-
dojo/store/JsonRest
163-
dojo/store/Memory
164-
dojo/store/Observable
165-
dojo/store/util/QueryResults
166-
dojo/store/util/SimpleQueryEngine
167-
dojo/string
168-
dojo/text
169-
dojo/throttle
170-
dojo/topic
171-
dojo/touch
172-
dojo/uacss
173-
dojo/when
174-
dojo/window)
175-
# Add the rest of the url to the path
176-
self.initial_paths = self.initial_paths.map { |l| l + ".html?xhr=true" }
10+
17711
# Dojo expects all the requests to be xhrs or it redirects you back to the docs home page
17812
# where it uses js to call the backend based on the URL so you get the appropriate documentation
17913
self.headers = { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' }
@@ -185,10 +19,27 @@ class Dojo < UrlScraper
18519
html_filters.push 'dojo/clean_html', 'dojo/entries'
18620

18721
# Don't use default selector on xhrs as no body or html document exists
188-
options[:container] = ->(filter) { filter.root_page? ? '#content' : false }
189-
options[:follow_links] = false
190-
options[:skip_links] = true
191-
options[:only] = self.initial_paths
22+
options[:container] = false
23+
24+
def root_page_body
25+
require 'json'
26+
require 'set'
27+
response = Typhoeus::Request.new("dojotoolkit.org/api/1.10/tree.json",
28+
headers: { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' }).run
29+
treeJSON = JSON.parse(response.response_body)
30+
treeJSON = treeJSON["children"].bsearch { |framework| framework["name"] == "dojo" }
31+
@url_set = Set.new
32+
def get_url_list treeJSON
33+
@url_set.add(self.class.base_url + treeJSON["fullname"] + ".html?xhr=true")
34+
if (treeJSON["children"])
35+
treeJSON["children"].each do |child|
36+
get_url_list child
37+
end
38+
end
39+
end
40+
get_url_list treeJSON
41+
@url_set.map { |l| "<a href='#{l}'>#{l}</a>"}.join "<br>"
42+
end
19243

19344
options[:attribution] = <<-HTML
19445
The Dojo Toolkit is Copyright &copy; 2005&ndash;2013 <br>

0 commit comments

Comments
 (0)