@@ -4,7 +4,13 @@ class Dojo < UrlScraper
44 self . slug = 'dojo'
55 self . type = 'dojo'
66 self . version = '1.10'
7- self . base_url = 'http://dojotoolkit.org/api/1.10/' #tree.json
7+ self . base_url = 'http://dojotoolkit.org/api/1.10/'
8+
9+ # This is a cut down list of the actually paths taken from the tree.json api on the dojo site
10+ # Dojo used javascript and xhr requests to allow users to browse it's documentation so it can't
11+ # be scrapped by just following links from the base page. This list was generating with a little
12+ # bash and then cut down in order to remove a lot of the more unused documentation e.g. kernel,
13+ # main, dnd and some others
814 self . initial_paths = %w(
915 dojo/AdapterRegistry
1016 dojo/aspect
@@ -17,65 +23,18 @@ class Dojo < UrlScraper
1723 dojo/_base/config.modulePaths
1824 dojo/_base/connect
1925 dojo/_base/declare
20- dojo/_base/declare.__DeclareCreatedObject
2126 dojo/_base/Deferred
2227 dojo/_base/event
2328 dojo/_base/fx
2429 dojo/_base/html
2530 dojo/_base/json
2631 dojo/_base/kernel
27- dojo/_base/kernel.back
28- dojo/_base/kernel.cldr
29- dojo/_base/kernel.colors
30- dojo/_base/kernel.config
31- dojo/_base/kernel.contentHandlers
32- dojo/_base/kernel._contentHandlers
33- dojo/_base/kernel.currency
34- dojo/_base/kernel.data
35- dojo/_base/kernel.date
36- dojo/_base/kernel.dijit
37- dojo/_base/kernel.dnd
38- dojo/_base/kernel.doc
39- dojo/_base/kernel.dojox
40- dojo/_base/kernel.fx
41- dojo/_base/kernel.gears
42- dojo/_base/kernel.global
43- dojo/_base/kernel._hasResource
44- dojo/_base/kernel.html
45- dojo/_base/kernel.i18n
46- dojo/_base/kernel.io
47- dojo/_base/kernel.__IoArgs
48- dojo/_base/kernel.__IoCallbackArgs
49- dojo/_base/kernel.__IoPublish
50- dojo/_base/kernel.keys
51- dojo/_base/kernel.mouseButtons
52- dojo/_base/kernel._nodeDataCache
53- dojo/_base/kernel.number
54- dojo/_base/kernel.regexp
55- dojo/_base/kernel.rpc
56- dojo/_base/kernel.scopeMap
57- dojo/_base/kernel.Stateful
58- dojo/_base/kernel.store
59- dojo/_base/kernel.string
60- dojo/_base/kernel.tests
61- dojo/_base/kernel.touch
62- dojo/_base/kernel.version
63- dojo/_base/kernel.window
64- dojo/_base/kernel.__XhrArgs
6532 dojo/_base/lang
6633 dojo/_base/loader
6734 dojo/_base/NodeList
6835 dojo/_base/query
6936 dojo/_base/sniff
7037 dojo/_base/unload
71- dojo/_base/url
72- dojo/_base/url.authority
73- dojo/_base/url.fragment
74- dojo/_base/url.password
75- dojo/_base/url.port
76- dojo/_base/url.query
77- dojo/_base/url.scheme
78- dojo/_base/url.user
7938 dojo/_base/window
8039 dojo/_base/window.doc
8140 dojo/_base/window.global
@@ -88,8 +47,6 @@ class Dojo < UrlScraper
8847 dojo/colors
8948 dojo/cookie
9049 dojo/currency
91- dojo/currency.__FormatOptions
92- dojo/currency.__ParseOptions
9350 dojo/data/api/Identity
9451 dojo/data/api/Item
9552 dojo/data/api/Notification
@@ -104,33 +61,10 @@ class Dojo < UrlScraper
10461 dojo/data/util/sorter
10562 dojo/date
10663 dojo/date/locale
107- dojo/date/locale.__FormatOptions
10864 dojo/date/stamp
10965 dojo/debounce
11066 dojo/Deferred
11167 dojo/DeferredList
112- dojo/dnd/autoscroll
113- dojo/dnd/autoscroll._validNodes
114- dojo/dnd/autoscroll._validOverflow
115- dojo/dnd/AutoSource
116- dojo/dnd/Avatar
117- dojo/dnd/common
118- dojo/dnd/common._defaultCreatorNodes
119- dojo/dnd/common._empty
120- dojo/dnd/Container
121- dojo/dnd/Container.__ContainerArgs
122- dojo/dnd/Manager
123- dojo/dnd/move
124- dojo/dnd/Moveable
125- dojo/dnd/Moveable.__MoveableArgs
126- dojo/dnd/move.boxConstrainedMoveable
127- dojo/dnd/move.constrainedMoveable
128- dojo/dnd/move.parentConstrainedMoveable
129- dojo/dnd/Mover
130- dojo/dnd/Selector
131- dojo/dnd/Source
132- dojo/dnd/Target
133- dojo/dnd/TimedMoveable
13468 dojo/dom
13569 dojo/dom-attr
13670 dojo/dom-class
@@ -146,7 +80,6 @@ class Dojo < UrlScraper
14680 dojo/errors/RequestError
14781 dojo/errors/RequestTimeoutError
14882 dojo/Evented
149- dojo/_firebug/firebug
15083 dojo/fx
15184 dojo/fx/easing
15285 dojo/fx.easing
@@ -168,44 +101,6 @@ class Dojo < UrlScraper
168101 dojo/keys
169102 dojo/loadInit
170103 dojo/main
171- dojo/main.back
172- dojo/main.cldr
173- dojo/main.colors
174- dojo/main.config
175- dojo/main.contentHandlers
176- dojo/main._contentHandlers
177- dojo/main.currency
178- dojo/main.data
179- dojo/main.date
180- dojo/main.dijit
181- dojo/main.dnd
182- dojo/main.doc
183- dojo/main.dojox
184- dojo/main.fx
185- dojo/main.gears
186- dojo/main.global
187- dojo/main._hasResource
188- dojo/main.html
189- dojo/main.i18n
190- dojo/main.io
191- dojo/main.__IoArgs
192- dojo/main.__IoCallbackArgs
193- dojo/main.__IoPublish
194- dojo/main.keys
195- dojo/main.mouseButtons
196- dojo/main._nodeDataCache
197- dojo/main.number
198- dojo/main.regexp
199- dojo/main.rpc
200- dojo/main.scopeMap
201- dojo/main.Stateful
202- dojo/main.store
203- dojo/main.string
204- dojo/main.tests
205- dojo/main.touch
206- dojo/main.version
207- dojo/main.window
208- dojo/main.__XhrArgs
209104 dojo/mouse
210105 dojo/node
211106 dojo/NodeList
@@ -217,12 +112,6 @@ class Dojo < UrlScraper
217112 dojo/NodeList._nodeDataCache
218113 dojo/NodeList-traverse
219114 dojo/number
220- dojo/number.__FormatAbsoluteOptions
221- dojo/number.__FormatOptions
222- dojo/number.__IntegerRegexpFlags
223- dojo/number.__ParseOptions
224- dojo/number.__RealNumberRegexpFlags
225- dojo/number.__RegexpOptions
226115 dojo/on
227116 dojo/on/asyncEventListener
228117 dojo/on/debounce
@@ -237,32 +126,16 @@ class Dojo < UrlScraper
237126 dojo/ready
238127 dojo/regexp
239128 dojo/request
240- dojo/request.__BaseOptions
241129 dojo/request/default
242130 dojo/request/handlers
243131 dojo/request/iframe
244- dojo/request/iframe.__BaseOptions
245- dojo/request/iframe.__MethodOptions
246- dojo/request/iframe.__Options
247- dojo/request.__MethodOptions
248132 dojo/request/node
249- dojo/request/node.__BaseOptions
250- dojo/request/node.__MethodOptions
251- dojo/request/node.__Options
252133 dojo/request/notify
253- dojo/request.__Options
254- dojo/request.__Promise
255134 dojo/request/registry
256135 dojo/request/script
257- dojo/request/script.__BaseOptions
258- dojo/request/script.__MethodOptions
259- dojo/request/script.__Options
260136 dojo/request/util
261137 dojo/request/watch
262138 dojo/request/xhr
263- dojo/request/xhr.__BaseOptions
264- dojo/request/xhr.__MethodOptions
265- dojo/request/xhr.__Options
266139 dojo/require
267140 dojo/robot
268141 dojo/robot._runsemaphore
@@ -299,14 +172,23 @@ class Dojo < UrlScraper
299172 dojo/uacss
300173 dojo/when
301174 dojo/window )
175+ # Add the rest of the url to the path
176+ self . initial_paths = self . initial_paths . map { |l | l + ".html?xhr=true" }
177+ # Dojo expects all the requests to be xhrs or it redirects you back to the docs home page
178+ # where it uses js to call the backend based on the URL so you get the appropriate documentation
179+ self . headers = { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' }
302180 self . links = {
303181 home : 'http://dojotoolkit.org' ,
304182 code : 'https://github.com/dojo/dojo'
305183 }
306184
307185 html_filters . push 'dojo/clean_html' , 'dojo/entries'
308186
187+ # Don't use default selector on xhrs as no body or html document exists
188+ options [ :container ] = -> ( filter ) { filter . root_page? ? '#content' : false }
189+ options [ :follow_links ] = false
309190 options [ :skip_links ] = true
191+ options [ :only ] = self . initial_paths
310192
311193 options [ :attribution ] = <<-HTML
312194 The Dojo Toolkit is Copyright © 2005–2013 < br >
0 commit comments