Skip to content

Commit c4a5439

Browse files
committed
Finish scikit-learn scraper
1 parent 1dbc03f commit c4a5439

13 files changed

Lines changed: 55 additions & 33 deletions

File tree

assets/images/docs.png

178 Bytes
Loading

assets/images/[email protected]

348 Bytes
Loading

assets/javascripts/collections/types.coffee

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
77
(result[@_groupFor(type)] ||= []).push(type)
88
result.filter (e) -> e.length > 0
99

10-
GUIDES_RGX = /(^|\()(guides?|tutorials?|reference|book|getting\ started|manual)($|[\):])/i
10+
GUIDES_RGX = /(^|\()(guides?|tutorials?|reference|book|getting\ started|manual|examples)($|[\):])/i
1111
APPENDIX_RGX = /appendix/i
1212

1313
_groupFor: (type) ->

assets/javascripts/news.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
[
22
[
3+
"2016-10-10",
4+
"New documentation: <a href=\"/scikit_learn/\">scikit-learn</a>"
5+
], [
36
"2016-09-18",
47
"New documentations: <a href=\"/pandas/\">pandas</a> and <a href=\"/twig/\">Twig</a>"
58
], [

assets/javascripts/templates/pages/about_tmpl.coffee

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ credits = [
504504
'2011 the scikit-image team',
505505
'BSD',
506506
'http://scikit-image.org/docs/dev/license.html'
507+
], [
508+
'scikit-learn',
509+
'2007-2016 The scikit-learn developers',
510+
'BSD',
511+
'https://raw.githubusercontent.com/scikit-learn/scikit-learn/master/COPYING'
507512
], [
508513
'Sinon',
509514
'2010-2016 Christian Johansen',

assets/stylesheets/global/_icons.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
._icon-scikit_image:before { background-position: -6rem -6rem; }
121121
._icon-twig:before { background-position: -7rem -6rem; }
122122
._icon-pandas:before { background-position: -8rem -6rem; }
123+
._icon-scikit_learn:before { background-position: -9rem -6rem; }
123124
._icon-bottle:before { background-position: 0 -7rem; }
124125
._icon-docker:before { background-position: -1rem -7rem; }
125126
._icon-cakephp:before { background-position: -2rem -7rem; }

assets/stylesheets/pages/_sphinx.scss

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
dd > dl:not(.docutils) > dt { @extend %block-label; }
77
dt + dt { margin-top: -.5em; }
88

9-
.note, .admonition, div.versionadded, div.versionchanged, .deprecated-removed, .deprecated { @extend %note; }
9+
.note, .admonition, div.versionadded, div.versionchanged, .deprecated-removed, .deprecated, .topic { @extend %note; }
1010

1111
.important { @extend %note-orange; }
1212
.warning, .deprecated-removed, .deprecated { @extend %note-red; }
1313

14-
.versionmodified, span.title {
14+
.versionmodified, span.title, .topic-title {
1515
display: block;
1616
font-weight: bold;
1717
}
@@ -37,16 +37,6 @@
3737
.admonition-title + dl { padding-top: .5em; }
3838

3939
td > div { margin: 0 !important; }
40-
41-
.row-fluid {
42-
h2 {
43-
background: none;
44-
border: none;
45-
> a {
46-
float: none;
47-
}
48-
}
49-
}
5040
}
5141

5242
._sphinx {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module Docs
2+
class ScikitLearn
3+
class CleanHtmlFilter < Filter
4+
def call
5+
if root_page?
6+
at_css('h1').content = 'scikit-learn'
7+
8+
css('.row-fluid').each do |node|
9+
html = '<dl>'
10+
node.css('.span4').each do |n|
11+
html += "<dt>#{n.first_element_child.inner_html}</dt>"
12+
html += "<dd>#{n.last_element_child.inner_html}</dd>"
13+
end
14+
html += '</dl>'
15+
node.replace(html)
16+
end
17+
end
18+
19+
doc
20+
end
21+
end
22+
end
23+
end
24+

lib/docs/filters/scikit_learn/entries.rb

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@ def get_name
77
name = at_css('dt').content.strip
88
name.sub! %r{\(.*}, '()' # Remove function arguments
99
name.remove! %r{[\=\[].*} # Remove [source] anchor
10-
# name.remove! %r{\s=.*} # Remove the occasional '=' in class names
1110
name.remove! %r{\A(class(method)?) (sklearn\.)?}
1211
else
1312
# User guide
1413
name = at_css('h1').content.strip
14+
name.remove! %r{\(.*?\)}
15+
name.remove! %r{(?<![A-Z]):.*}
16+
name.prepend 'Tutorial: ' if type == 'Tutorials'
17+
name.prepend 'Example: ' if type == 'Examples'
1518
end
1619

1720
name.remove! "\u{00B6}"
@@ -23,14 +26,19 @@ def get_type
2326
if subpath.start_with?('modules/generated')
2427
type = at_css('dt > .descclassname').content.strip
2528
type.remove! 'sklearn.'
26-
type.remove! '.'
29+
type.remove! %r{\.\z}
2730
type
31+
elsif subpath.start_with?('tutorial')
32+
'Tutorials'
33+
elsif subpath.start_with?('auto_examples')
34+
'Examples'
2835
else
2936
'Guide'
3037
end
3138
end
3239

3340
def additional_entries
41+
return [] unless subpath.start_with?('modules/generated')
3442
entries = []
3543

3644
css('.class > dt[id]', '.exception > dt[id]', '.attribute > dt[id]').each do |node|

lib/docs/filters/sphinx/clean_html.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module Docs
22
class Sphinx
33
class CleanHtmlFilter < Filter
44
def call
5-
css('.headerlink', 'hr', '#contents .topic-title', '#topics .topic-title', 'colgroup').remove
5+
css('.headerlink', 'hr', '#contents .topic-title', '#topics .topic-title', 'colgroup', '.line-block').remove
66

77
css('.contents > ul:first-child:last-child.simple > li:first-child:last-child').each do |node|
88
node.parent.before(node.at_css('> ul')) if node.at_css('> ul')

0 commit comments

Comments
 (0)