forked from buriy/python-readability
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug.py
More file actions
53 lines (40 loc) · 1.28 KB
/
debug.py
File metadata and controls
53 lines (40 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
#FIXME: use with caution, can leak memory
uids = {}
uids_document = None
def describe_node(node):
global uids
if node is None:
return ''
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''):
name += '#' + node.get('id')
if node.get('class', '').strip():
name += '.' + '.'.join(node.get('class').split())
if name[:4] in ['div#', 'div.']:
name = name[3:]
if name in ['tr', 'td', 'div', 'p']:
uid = uids.get(node)
if uid is None:
uid = uids[node] = len(uids) + 1
name += "{%02d}" % uid
return name
def describe(node, depth=1):
global uids, uids_document
doc = node.getroottree().getroot()
if doc != uids_document:
uids = {}
uids_document = doc
#return repr(NodeRepr(node))
parent = ''
if depth and node.getparent() is not None:
parent = describe(node.getparent(), depth=depth - 1) + '>'
return parent + describe_node(node)
RE_COLLAPSE_WHITESPACES = re.compile('\s+', re.U)
def text_content(elem, length=40):
content = RE_COLLAPSE_WHITESPACES.sub(' ', elem.text_content().replace('\r', ''))
if len(content) < length:
return content
return content[:length] + '...'