Skip to content

Commit 659cf94

Browse files
committed
Finish pandas scraper
1 parent 1941687 commit 659cf94

12 files changed

Lines changed: 36 additions & 12 deletions

File tree

assets/images/docs.png

219 Bytes
Loading

assets/images/[email protected]

293 Bytes
Loading

assets/javascripts/news.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[
22
[
33
"2016-09-18",
4-
"New documentation: <a href=\"/twig/\">Twig</a>"
4+
"New documentations: <a href=\"/pandas/\">pandas</a> and <a href=\"/twig/\">Twig</a>"
55
], [
66
"2016-09-05",
77
"New documentations: <a href=\"/fish/\">Fish</a>, <a href=\"/bottle/\">Bottle</a> and <a href=\"/scikit_image/\">scikit-image</a>"

assets/javascripts/templates/pages/about_tmpl.coffee

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,11 @@ credits = [
399399
'2010-2016 Padrino',
400400
'MIT',
401401
'https://raw.githubusercontent.com/padrino/padrino-framework/master/padrino/LICENSE.txt'
402+
], [
403+
'pandas',
404+
'2011-2012 Lambda Foundry, Inc. and PyData Development Team<br>&copy; 2008-2011 AQR Capital Management, LLC<br>&copy; 2008-2014 the pandas development team',
405+
'BSD',
406+
'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE'
402407
], [
403408
'Perl',
404409
'1993-2016 Larry Wall and others',

assets/stylesheets/global/_icons.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
._icon-fish:before { background-position: -5rem -6rem; @extend %darkIconFix !optional; }
120120
._icon-scikit_image:before { background-position: -6rem -6rem; }
121121
._icon-twig:before { background-position: -7rem -6rem; }
122+
._icon-pandas:before { background-position: -8rem -6rem; }
122123
._icon-bottle:before { background-position: 0 -7rem; }
123124
._icon-docker:before { background-position: -1rem -7rem; }
124125
._icon-cakephp:before { background-position: -2rem -7rem; }

assets/stylesheets/pages/_sphinx.scss

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
%sphinx {
2-
h2, h3 { @extend %block-heading; }
2+
h2 { @extend %block-heading; }
3+
h3 { @extend %block-label; }
34
h4 { font-size: 1em; }
45
> dl:not(.docutils) > dt { @extend %block-label, %label-blue; }
56
dd > dl:not(.docutils) > dt { @extend %block-label; }

lib/docs/filters/pandas/clean_html.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@ class CleanHtmlFilter < Filter
44
def call
55
@doc = at_css('.body')
66

7+
if root_page?
8+
css('a[href$=".zip"]', 'a[href$=".pdf"]', '.toctree-wrapper').remove
9+
at_css('h1').content = 'pandas'
10+
end
11+
12+
css('h2 > a.reference', 'h3 > a.reference').each do |node|
13+
node.before(node.children).remove
14+
end
15+
16+
css('.anchor-link').remove
17+
718
doc
819
end
920
end

lib/docs/filters/pandas/entries.rb

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,25 @@ module Docs
22
class Pandas
33
class EntriesFilter < Docs::EntriesFilter
44
def get_name
5-
if dt = at_css('dt')
6-
name = dt.content.strip
5+
if subpath.start_with?('generated')
6+
name = at_css('dt').content.strip
77
name.sub! %r{\(.*}, '()'
88
name.remove! %r{\s=.*}
9-
name.remove! %r{\A(class(method)?) }
9+
name.remove! %r{\A(class(method)?) (pandas\.)?}
1010
else
1111
name = at_css('h1').content.strip
12+
name.prepend "#{css('.toctree-l1 > a:not([href^="http"])').to_a.index(at_css('.toctree-l1.current > a')) + 1}. "
1213
end
1314
name.remove! "\u{00B6}"
1415
name
1516
end
1617

1718
def get_type
18-
css(".toctree-l2.current > a").last.content
19+
if subpath.start_with?('generated')
20+
css('.toctree-l2.current > a').last.content
21+
else
22+
'Manual'
23+
end
1924
end
2025
end
2126
end

lib/docs/filters/sphinx/clean_html.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def call
4949
end
5050

5151
css('dt').each do |node|
52-
next unless node['id'] || node.at_css('code')
52+
next unless node['id'] || node.at_css('code, .classifier')
5353
links = []
5454
links << node.children.last.remove while node.children.last.try(:name) == 'a'
5555
node.inner_html = "<code>#{node.content.strip}</code> "

lib/docs/scrapers/pandas.rb

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module Docs
22
class Pandas < UrlScraper
33
self.name = 'pandas'
44
self.type = 'sphinx'
5-
self.root_path = 'api.html'
5+
self.root_path = 'index.html'
66
self.links = {
77
home: 'http://pandas.pydata.org/',
88
code: 'https://github.com/pydata/pandas'
@@ -13,12 +13,13 @@ class Pandas < UrlScraper
1313
# Cannot take only the body, as the sidebar gives info about the type.
1414
options[:container] = '.document'
1515

16-
# Using the above container, leads to tons of anchors. Only keep the generated/ pages.
17-
options[:only_patterns] = [/\Agenerated\//]
16+
options[:skip] = %w(internals.html release.html contributing.html whatsnew.html)
1817

1918
options[:attribution] = <<-HTML
20-
&copy; 2008&ndash;2014, the pandas development team.<br>
21-
Licensed under the BSD license.
19+
&copy; 2011&ndash;2012 Lambda Foundry, Inc. and PyData Development Team<br>
20+
&copy; 2008&ndash;2011 AQR Capital Management, LLC<br>
21+
&copy; 2008&ndash;2014 the pandas development team<br>
22+
Licensed under the 3-clause BSD License.
2223
HTML
2324

2425
version '0.18' do

0 commit comments

Comments
 (0)