Skip to content

Commit 65d1ebb

Browse files
committed
Fixed buriy#70 and added xpath option
1 parent fae95ba commit 65d1ebb

1 file changed

Lines changed: 9 additions & 1 deletion

File tree

readability/readability.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def text_length(i):
6767
def compile_pattern(elements):
6868
if not elements:
6969
return None
70+
elif isinstance(elements, (list, tuple)):
71+
return list(elements)
7072
elif isinstance(elements, regexp_type):
7173
return elements
7274
else:
@@ -78,7 +80,7 @@ class Document:
7880
"""Class to build a etree document out of html."""
7981

8082
def __init__(self, input, positive_keywords=None, negative_keywords=None,
81-
url=None, min_text_length=25, retry_length=250, ):
83+
url=None, min_text_length=25, retry_length=250, xpath=False):
8284
"""Generate the document
8385
8486
:param input: string of the html content.
@@ -99,10 +101,16 @@ def __init__(self, input, positive_keywords=None, negative_keywords=None,
99101
self.url = url
100102
self.min_text_length = min_text_length
101103
self.retry_length = retry_length
104+
self.xpath = xpath
102105

103106
def _html(self, force=False):
104107
if force or self.html is None:
105108
self.html = self._parse(self.input)
109+
if self.xpath:
110+
root = self.html.getroottree()
111+
for i in self.html.getiterator():
112+
#print root.getpath(i)
113+
i.attrib['x'] = root.getpath(i)
106114
return self.html
107115

108116
def _parse(self, input):

0 commit comments

Comments
 (0)