Skip to content

Commit 8902d53

Browse files
committed
Finish D scraper
1 parent bc8d943 commit 8902d53

13 files changed

Lines changed: 196 additions & 61 deletions

File tree

assets/images/docs-2.png

701 Bytes
Loading

assets/images/[email protected]

2.34 KB
Loading

assets/javascripts/news.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[
22
[
33
"2017-09-03",
4-
"New documentations: <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
4+
"New documentations: <a href=\"/d/\">D</a>, <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
55
], [
66
"2017-07-23",
77
"New documentation: <a href=\"/godot/\">Godot</a>"

assets/javascripts/templates/pages/about_tmpl.coffee

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ credits = [
193193
'2012-2017 Manas Technology Solutions',
194194
'Apache',
195195
'https://raw.githubusercontent.com/crystal-lang/crystal/master/LICENSE'
196+
], [
197+
'D',
198+
'1999-2017 The D Language Foundation',
199+
'Boost',
200+
'https://raw.githubusercontent.com/dlang/phobos/master/LICENSE_1_0.txt'
196201
], [
197202
'D3.js',
198203
'2010-2017 Michael Bostock',

assets/javascripts/vendor/prism.js

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
1+
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+d+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
22
var _self = (typeof window !== 'undefined')
33
? window // if in browser
44
: (
@@ -976,6 +976,70 @@ delete Prism.languages.coffeescript['template-string'];
976976
}
977977
];
978978
}(Prism));
979+
Prism.languages.d = Prism.languages.extend('clike', {
980+
'string': [
981+
// r"", x""
982+
/\b[rx]"(\\.|[^\\"])*"[cwd]?/,
983+
// q"[]", q"()", q"<>", q"{}"
984+
/\bq"(?:\[[\s\S]*?\]|\([\s\S]*?\)|<[\s\S]*?>|\{[\s\S]*?\})"/,
985+
// q"IDENT
986+
// ...
987+
// IDENT"
988+
/\bq"([_a-zA-Z][_a-zA-Z\d]*)(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\1"/,
989+
// q"//", q"||", etc.
990+
/\bq"(.)[\s\S]*?\1"/,
991+
// Characters
992+
/'(?:\\'|\\?[^']+)'/,
993+
994+
/(["`])(\\.|(?!\1)[^\\])*\1[cwd]?/
995+
],
996+
997+
'number': [
998+
// The lookbehind and the negative look-ahead try to prevent bad highlighting of the .. operator
999+
// Hexadecimal numbers must be handled separately to avoid problems with exponent "e"
1000+
/\b0x\.?[a-f\d_]+(?:(?!\.\.)\.[a-f\d_]*)?(?:p[+-]?[a-f\d_]+)?[ulfi]*/i,
1001+
{
1002+
pattern: /((?:\.\.)?)(?:\b0b\.?|\b|\.)\d[\d_]*(?:(?!\.\.)\.[\d_]*)?(?:e[+-]?\d[\d_]*)?[ulfi]*/i,
1003+
lookbehind: true
1004+
}
1005+
],
1006+
1007+
// In order: $, keywords and special tokens, globally defined symbols
1008+
'keyword': /\$|\b(?:abstract|alias|align|asm|assert|auto|body|bool|break|byte|case|cast|catch|cdouble|cent|cfloat|char|class|const|continue|creal|dchar|debug|default|delegate|delete|deprecated|do|double|else|enum|export|extern|false|final|finally|float|for|foreach|foreach_reverse|function|goto|idouble|if|ifloat|immutable|import|inout|int|interface|invariant|ireal|lazy|long|macro|mixin|module|new|nothrow|null|out|override|package|pragma|private|protected|public|pure|real|ref|return|scope|shared|short|static|struct|super|switch|synchronized|template|this|throw|true|try|typedef|typeid|typeof|ubyte|ucent|uint|ulong|union|unittest|ushort|version|void|volatile|wchar|while|with|__(?:(?:FILE|MODULE|LINE|FUNCTION|PRETTY_FUNCTION|DATE|EOF|TIME|TIMESTAMP|VENDOR|VERSION)__|gshared|traits|vector|parameters)|string|wstring|dstring|size_t|ptrdiff_t)\b/,
1009+
'operator': /\|[|=]?|&[&=]?|\+[+=]?|-[-=]?|\.?\.\.|=[>=]?|!(?:i[ns]\b|<>?=?|>=?|=)?|\bi[ns]\b|(?:<[<>]?|>>?>?|\^\^|[*\/%^~])=?/
1010+
});
1011+
1012+
1013+
Prism.languages.d.comment = [
1014+
// Shebang
1015+
/^\s*#!.+/,
1016+
// /+ +/
1017+
{
1018+
// Allow one level of nesting
1019+
pattern: /(^|[^\\])\/\+(?:\/\+[\s\S]*?\+\/|[\s\S])*?\+\//,
1020+
lookbehind: true
1021+
}
1022+
].concat(Prism.languages.d.comment);
1023+
1024+
Prism.languages.insertBefore('d', 'comment', {
1025+
'token-string': {
1026+
// Allow one level of nesting
1027+
pattern: /\bq\{(?:|\{[^}]*\}|[^}])*\}/,
1028+
alias: 'string'
1029+
}
1030+
});
1031+
1032+
Prism.languages.insertBefore('d', 'keyword', {
1033+
'property': /\B@\w*/
1034+
});
1035+
1036+
Prism.languages.insertBefore('d', 'function', {
1037+
'register': {
1038+
// Iasm registers
1039+
pattern: /\b(?:[ABCD][LHX]|E[ABCD]X|E?(?:BP|SP|DI|SI)|[ECSDGF]S|CR[0234]|DR[012367]|TR[3-7]|X?MM[0-7]|R[ABCD]X|[BS]PL|R[BS]P|[DS]IL|R[DS]I|R(?:[89]|1[0-5])[BWD]?|XMM(?:[89]|1[0-5])|YMM(?:1[0-5]|\d))\b|\bST(?:\([0-7]\)|\b)/,
1040+
alias: 'variable'
1041+
}
1042+
});
9791043
Prism.languages.elixir = {
9801044
// Negative look-ahead is needed for string interpolation
9811045
// Negative look-behind is needed to avoid highlighting markdown headers in

assets/stylesheets/global/_icons.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,4 @@
175175
._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; }
176176
._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
177177
._icon-vulkan:before { background-position: -6rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
178+
._icon-d:before { background-position: -7rem -2rem; @extend %doc-icon-2; }

assets/stylesheets/pages/_d.scss

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,9 @@
11
._d {
2-
> .description, > .documentation-section { padding-left: 1rem; }
3-
> .description > h2, header > h3, > h2 { @extend %block-heading; }
4-
.description > h1 { font-size: 1rem; }
5-
.method-description > h2, h3, h4, h5, h6 { font-size: 1em; }
2+
h2 { @extend %block-heading; }
3+
h3, .d_decl { @extend %block-label, %label-blue; }
4+
.d_decl { @extend %code; }
65

7-
.d_decl {
8-
font-weight: $boldFontWeight;
9-
@extend %block-label, %label-blue;
6+
p > code, li > code, td > code, dd > code { @extend %label; }
107

11-
+ .d_decl { margin-top: -.5em; }
12-
}
13-
14-
> .meta {
15-
@extend %note, %note-blue;
16-
17-
> dd { margin: 0; }
18-
> dd + dt { margin-top: .5em; }
19-
}
20-
21-
a.method-click-advice {
22-
float: right;
23-
font-size: .75rem;
24-
color: $linkColor;
25-
cursor: pointer;
26-
@extend %user-select-none;
27-
28-
&:hover { text-decoration: underline; }
29-
}
30-
31-
.method-description { position: relative; }
32-
33-
.method-source-code {
34-
display: none;
35-
position: absolute;
36-
z-index: 1;
37-
top: 0;
38-
right: 0;
39-
background: rgba($contentBackground, .95);
40-
box-shadow: 0 1em 1em 1em $contentBackground;
41-
42-
> pre { margin: 0; }
43-
}
8+
span.red { color: $textColorRed; }
449
}

lib/docs/filters/d/clean_html.rb

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,75 @@ module Docs
22
class D
33
class CleanHtmlFilter < Filter
44
def call
5-
css('.d_decl > div > span.def-anchor').each do |node|
6-
node.parent.parent['id'] = node['id']
5+
@doc = at_css("#content")
6+
7+
css('#tools', '#copyright').remove
8+
9+
css('td > b', 'h1 > span').each do |node|
10+
node.before(node.children).remove
711
end
12+
13+
css('span.d_inlinecode').each do |node|
14+
node.name = 'code'
15+
node.remove_attribute('class')
16+
end
17+
18+
css('.keyval').each do |node|
19+
key = node.at_css('.key')
20+
dt = key.inner_html
21+
dd = if val = node.at_css('.val')
22+
val.inner_html
23+
else
24+
siblings = []
25+
siblings << key while key = key.next
26+
siblings.map(&:to_html).join
27+
end
28+
node.replace("<dl><dt>#{dt}</dt><dd>#{dd}</dd></dl>")
29+
end
30+
31+
css('div.summary', 'div.description').each do |node|
32+
node.name = 'p' unless node.at_css('p')
33+
node.css('.blankline').each { |n| n.replace('<br><br>') }
34+
end
35+
36+
css('.d_decl').each do |node|
37+
node['id'] = node.at_css('.def-anchor')['id'].remove(/\A\./)
38+
constraints = node.css('.constraint').remove
39+
node.content = node.content.strip
40+
node.inner_html = node.inner_html.gsub(/;\s*/, '<br>').remove(/<br>\z/)
41+
node << "<br><br> Constraints:<br> #{constraints.map(&:content).join('<br> ')}" unless constraints.empty?
42+
end
43+
44+
css('pre').each do |node|
45+
node.content = node.content
46+
node['data-language'] = 'd' if node['class'] && node['class'].include?('d_code')
47+
end
48+
49+
css('div', 'code > a > code', 'code > code').each do |node|
50+
node.before(node.children).remove
51+
end
52+
53+
css('a[href*="#."]').each do |node|
54+
node['href'] = node['href'].sub('#.', '#')
55+
end
56+
57+
css('tr', 'td', 'code', 'pre', 'p', 'table').remove_attr('class')
58+
css('table').remove_attr('border').remove_attr('cellpadding').remove_attr('cellspacing')
59+
60+
if base_url.path == '/spec/'
61+
css('a.anchor').each do |node|
62+
node.parent['id'] ||= node['id']
63+
node.before(node.children).remove
64+
end
65+
66+
css('center').each do |node|
67+
node.before(node.children).remove
68+
end
69+
70+
css('.fa-angle-left + a').remove
71+
css('a + .fa-angle-right').each { |node| node.previous_element.remove }
72+
end
73+
874
doc
975
end
1076
end

lib/docs/filters/d/entries.rb

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,46 @@ module Docs
22
class D
33
class EntriesFilter < Docs::EntriesFilter
44
def get_name
5-
slug.to_s.gsub('_', '.').gsub('/', '.').squish!
5+
name = at_css('h1').content
6+
7+
if base_url.path == '/spec/'
8+
index = css('.subnav li a').to_a.index(at_css(".subnav li a[href='#{result[:path]}']")) + 1
9+
name.prepend "#{index}. "
10+
end
11+
12+
name
613
end
714

815
def get_type
9-
slug.to_s.sub(/_(.*)/, '')
16+
return 'Reference' if base_url.path == '/spec/'
17+
18+
if name.start_with?('etc') || name.start_with?('core.stdc.')
19+
name.split('.')[0..2].join('.')
20+
elsif name.start_with?('ddmd')
21+
'ddmd'
22+
else
23+
name.split('.')[0..1].join('.')
24+
end
1025
end
1126

1227
def additional_entries
13-
names = []
14-
css('.book > tr > td > a').each do |x|
15-
span_block = x.at_css('span')
16-
if span_block != nil
17-
elem_name = span_block.text
18-
name = "#{get_name}.#{elem_name}"
19-
type = name.sub(/\..*/,'')
20-
names << [name, "#{slug}#{x['href']}", type]
28+
return [] if root_page? || base_url.path == '/spec/'
29+
30+
entries = []
31+
32+
css('.book > tr > td > a').each do |node|
33+
entries << ["#{self.name}.#{node.content}", node['href'].remove(/\A#/).remove(/\A\./)]
34+
end
35+
36+
if entries.empty?
37+
css('.quickindex[id]').each do |node|
38+
name = node['id'].remove(/quickindex\.?/)
39+
next if name.empty? || name =~ /\.\d+\z/
40+
entries << ["#{self.name}.#{name}", name]
2141
end
2242
end
23-
names
43+
44+
entries
2445
end
2546
end
2647
end

lib/docs/scrapers/d.rb

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
11
module Docs
22
class D < UrlScraper
3-
self.release = '2.075.1'
3+
include MultipleBaseUrls
4+
5+
self.release = '2.076.0'
46
self.type = 'd'
5-
self.base_url = 'http://dlang.org/phobos/'
7+
self.base_urls = ['https://dlang.org/phobos/', 'https://dlang.org/spec/']
8+
self.root_path = 'index.html'
9+
self.links = {
10+
home: 'https://dlang.org/',
11+
code: 'https://github.com/dlang/phobos'
12+
}
613

714
html_filters.push 'd/entries', 'd/clean_html'
815

9-
options[:container] = '#content'
16+
options[:skip] = %w(spec.html)
17+
options[:container] = '.container'
18+
options[:root_title] = 'D Programming Language'
1019
options[:title] = false
11-
options[:root_title] = 'D Language'
12-
options[:skip_patterns] = [/#.*/]
1320

1421
options[:attribution] = <<-HTML
15-
Copyright &copy; 1999-2017 by the D Language Foundation
22+
&copy; 1999&ndash;2017 The D Language Foundation<br>
23+
Licensed under the Boost License 1.0.
1624
HTML
25+
26+
def initial_urls
27+
%w(https://dlang.org/phobos/index.html https://dlang.org/spec/intro.html)
28+
end
1729
end
1830
end

0 commit comments

Comments
 (0)