-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml2JsonArray.js
More file actions
100 lines (90 loc) · 2.75 KB
/
html2JsonArray.js
File metadata and controls
100 lines (90 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
const HTMLParser = (require('./htmlparser-node')).HTMLParser;
const fs = require('fs');
const jsonFomat = require('./jsonFomat');
module.exports = ({
htmlText,
inputFile,
outputFile
}) => {
var results = {};
var parentTagObj;
var curTagValueArr;
var filoObjArr = []; // first in last out array
var isEndOfTag = true;
var html_content = htmlText;
if (!htmlText) {
if (inputFile && fs.existsSync(inputFile)) {
html_content = fs.readFileSync(inputFile).toString();
}
else {
console.log('no valid input!');
return null;
}
}
function start(tag, attrs, unary) {
var tag_obj;
isEndOfTag = false;
if (filoObjArr.length === 0) {
tag_obj = results;
tag_obj[tag] = [];
filoObjArr.push(tag_obj);
curTagValueArr = tag_obj[tag];
}
else {
tag_obj = {};
tag_obj[tag] = [];
filoObjArr.push(tag_obj);
curTagValueArr.push(tag_obj);
curTagValueArr = tag_obj[tag];
}
for (var i = 0; i < attrs.length; i++) {
const attr_obj = {};
const attr = attrs[i];
attr_obj['$' + attr.name] = attr.value;
curTagValueArr.push(attr_obj);
}
if (unary) {
console.log();
end(tag);
}
}
function end(tag) {
filoObjArr.pop();
const last_index = filoObjArr.length - 1;
if (last_index < 0) {
return;
}
const obj = filoObjArr[last_index];
curTagValueArr = Object.values(obj)[0];
isEndOfTag = true;
}
HTMLParser(html_content, {
start: start,
end: end,
chars: function (text) {
// console.log(text);
if (isEndOfTag) { //&& (text==='\n' || !curTagValueArr || /^\x20+$/g.test(text) ) ) {
return;
}
if (text === '' || /^[\n\t\r\x20]+$/g.test(text) || !curTagValueArr || /^\x20+$/g.test(text)) {
return;
}
if ( /[\n\r]+/.test(text) ) {
console.log(text);
}
// text = text.replace(new RegExp('"', 'g'), '\\"');
// console.log(text);
curTagValueArr.push({ $innerText: text });
},
comment: function (text) {
// console.log(text);
}
});
console.log(results);
if (outputFile) {
// 自定义输出格式
const json_text = 'module.exports = \n' + jsonFomat(results);
fs.writeFileSync(outputFile, json_text);
}
return results;
};