Skip to content

Commit 7d2ef20

Browse files
committed
i5rank
1 parent 4a84407 commit 7d2ef20

5 files changed

Lines changed: 200 additions & 0 deletions

File tree

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import operator
2+
3+
class RankBase(object):
4+
""" Accept text data as inputs and rank them in
5+
terms of how much a word occurs in them """
6+
7+
def __init__(self, word):
8+
self.word = word.strip().lower()
9+
10+
def rank(self, *texts):
11+
""" Rank input data. A tuple is returned with
12+
(idx, #occur) in decreasing order of
13+
occurences """
14+
15+
occurs = {}
16+
17+
for idx,text in enumerate(texts):
18+
# print text
19+
words = map(lambda x: x.lower().strip(), text.split())
20+
count = words.count(self.word)
21+
occurs[idx] = count
22+
23+
# Return dictionary
24+
return occurs
25+
26+
def sort(self, occurs):
27+
""" Return the ranking data in sorted order """
28+
29+
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
30+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import operator
2+
3+
class TextRank(object):
4+
""" Accept text files as inputs and rank them in
5+
terms of how much a word occurs in them """
6+
7+
def __init__(self, word, *filenames):
8+
self.word = word.strip().lower()
9+
self.filenames = filenames
10+
11+
def rank(self):
12+
""" Rank the files. A tuple is returned with
13+
(filename, #occur) in decreasing order of
14+
occurences """
15+
16+
occurs = []
17+
18+
for fpath in self.filenames:
19+
data = open(fpath).read()
20+
words = map(lambda x: x.lower().strip(), data.split())
21+
print('words=', words)
22+
# Filter empty words
23+
count = words.count(self.word)
24+
occurs.append((fpath, count))
25+
26+
# Return in sorted order
27+
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Code listing #20
2+
3+
""" Module textrank - Rank text files in order of degree of a specific word frequency. """
4+
5+
# Note: This is textrank.py rewritten to use rankbase, so called textrank2.py
6+
7+
import operator
8+
from i5rankbase_improve import RankBase
9+
10+
class TextRank(object):
11+
""" Accept text files as inputs and rank them in
12+
terms of how much a word occurs in them """
13+
14+
def __init__(self, word, *filenames):
15+
self.word = word.strip().lower()
16+
self.filenames = filenames
17+
18+
def rank(self):
19+
""" Rank the files. A tuple is returned with
20+
(filename, #occur) in decreasing order of
21+
occurences """
22+
23+
occurs = []
24+
25+
for fpath in self.filenames:
26+
data = open(fpath).read()
27+
words = map(lambda x: x.lower().strip(), data.split())
28+
# Filter empty words
29+
count = words.count(self.word)
30+
occurs.append((fpath, count))
31+
32+
# Return in sorted order
33+
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
34+
35+
class TextRank(RankBase):
36+
""" Accept text files as inputs and rank them in
37+
terms of how much a word occurs in them """
38+
39+
def __init__(self, word, *filenames):
40+
self.word = word.strip().lower()
41+
self.filenames = filenames
42+
43+
def rank(self):
44+
""" Rank the files. A tuple is returned with
45+
(filename, #occur) in decreasing order of
46+
occurences """
47+
48+
texts = map(lambda x: open(x).read(), self.filenames)
49+
occurs = super(TextRank, self).rank(*texts)
50+
# Convert to filename list
51+
occurs = [(self.filenames[x],y) for x,y in occurs.items()]
52+
53+
return self.sort(occurs)
54+
55+
if __name__ == "__main__":
56+
import sys
57+
print TextRank('common',*sys.argv[1:]).rank()
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import operator
2+
import requests
3+
4+
class UrlRank(object):
5+
""" Accept URLs as inputs and rank them in
6+
terms of how much a word occurs in them """
7+
8+
def __init__(self, word, *urls):
9+
self.word = word.strip().lower()
10+
self.urls = urls
11+
12+
def rank(self):
13+
""" Rank the URLs. A tuple is returned with
14+
(url, #occur) in decreasing order of
15+
occurences """
16+
17+
occurs = []
18+
19+
for url in self.urls:
20+
data = requests.get(url).content
21+
words = map(lambda x: x.lower().strip(), data.split())
22+
# Filter empty words
23+
count = words.count(self.word)
24+
occurs.append((url, count))
25+
26+
# Return in sorted order
27+
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Code listing #21
2+
3+
""" Module urlrank - Rank URLs in order of degree of a specific word frequency """
4+
5+
# Note: This is urlrank.py rewritten to use rankbase so called urlrank2.py
6+
7+
import requests
8+
import operator
9+
10+
from i5rankbase_improve import RankBase
11+
12+
class UrlRank(object):
13+
""" Accept URLs as inputs and rank them in
14+
terms of how much a word occurs in them """
15+
16+
def __init__(self, word, *urls):
17+
self.word = word.strip().lower()
18+
self.urls = urls
19+
20+
def rank(self):
21+
""" Rank the URLs. A tuple is returned with
22+
(url, #occur) in decreasing order of
23+
occurences """
24+
25+
occurs = []
26+
27+
for url in self.urls:
28+
data = requests.get(url).content
29+
words = map(lambda x: x.lower().strip(), data.split())
30+
# Filter empty words
31+
count = words.count(self.word)
32+
occurs.append((url, count))
33+
34+
# Return in sorted order
35+
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
36+
37+
class UrlRank(RankBase):
38+
""" Accept URLs as inputs and rank them in
39+
terms of how much a word occurs in them """
40+
41+
def __init__(self, word, *urls):
42+
self.word = word.strip().lower()
43+
self.urls = urls
44+
45+
def rank(self):
46+
""" Rank the URLs. A tuple is returned with
47+
(url, #occur) in decreasing order of
48+
occurences """
49+
50+
texts = map(lambda x: requests.get(x).content, self.urls)
51+
occurs = super(UrlRank, self).rank(*texts)
52+
# Convert to URLs list
53+
occurs = [(self.urls[x],y) for x,y in occurs.items()]
54+
55+
return self.sort(occurs)
56+
57+
if __name__ == "__main__":
58+
import sys
59+
print(UrlRank('python',*sys.argv[1:]).rank())

0 commit comments

Comments
 (0)