i5rank

greatabel · greatabel · commit 7d2ef20f2a15 · 2018-07-03T21:47:02.000+08:00
diff --git a/Software Architecture with Python/2Writing Modifiable and Readable Code/i5rankbase_improve.py b/Software Architecture with Python/2Writing Modifiable and Readable Code/i5rankbase_improve.py
@@ -0,0 +1,30 @@
+import operator
+
+class RankBase(object):
+    """ Accept text data as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word):
+        self.word = word.strip().lower()
+
+    def rank(self, *texts):
+        """ Rank input data. A tuple is returned with
+        (idx, #occur) in decreasing order of
+        occurences """
+
+        occurs = {}
+        
+        for idx,text in enumerate(texts):
+            # print text
+            words = map(lambda x: x.lower().strip(), text.split())
+            count = words.count(self.word)
+            occurs[idx] = count
+
+        # Return dictionary
+        return occurs
+
+    def sort(self, occurs):
+        """ Return the ranking data in sorted order """
+
+        return sorted(occurs, key=operator.itemgetter(1), reverse=True) 
+
diff --git a/Software Architecture with Python/2Writing Modifiable and Readable Code/i5textrank.py b/Software Architecture with Python/2Writing Modifiable and Readable Code/i5textrank.py
@@ -0,0 +1,27 @@
+import operator
+
+class TextRank(object):
+    """ Accept text files as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *filenames):
+        self.word = word.strip().lower()
+        self.filenames = filenames
+
+    def rank(self):
+        """ Rank the files. A tuple is returned with
+        (filename, #occur) in decreasing order of
+        occurences """
+
+        occurs = []
+
+        for fpath in self.filenames:
+            data = open(fpath).read()
+            words = map(lambda x: x.lower().strip(), data.split())
+            print('words=', words)
+            # Filter empty words
+            count = words.count(self.word)
+            occurs.append((fpath, count))
+
+        # Return in sorted order
+        return sorted(occurs, key=operator.itemgetter(1), reverse=True)
diff --git a/Software Architecture with Python/2Writing Modifiable and Readable Code/i5textrank_improve.py b/Software Architecture with Python/2Writing Modifiable and Readable Code/i5textrank_improve.py
@@ -0,0 +1,57 @@
+# Code listing #20
+
+""" Module textrank - Rank text files in order of degree of a specific word frequency. """
+
+# Note: This is textrank.py rewritten to use rankbase, so called textrank2.py
+
+import operator
+from i5rankbase_improve import RankBase
+
+class TextRank(object):
+    """ Accept text files as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *filenames):
+        self.word = word.strip().lower()
+        self.filenames = filenames
+
+    def rank(self):
+        """ Rank the files. A tuple is returned with
+        (filename, #occur) in decreasing order of
+        occurences """
+
+        occurs = []
+        
+        for fpath in self.filenames:
+            data = open(fpath).read()
+            words = map(lambda x: x.lower().strip(), data.split())
+            # Filter empty words
+            count = words.count(self.word)
+            occurs.append((fpath, count))
+
+        # Return in sorted order
+        return sorted(occurs, key=operator.itemgetter(1), reverse=True)
+
+class TextRank(RankBase):
+    """ Accept text files as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *filenames):
+        self.word = word.strip().lower()
+        self.filenames = filenames
+
+    def rank(self):
+        """ Rank the files. A tuple is returned with
+        (filename, #occur) in decreasing order of
+        occurences """
+
+        texts = map(lambda x: open(x).read(), self.filenames)
+        occurs = super(TextRank, self).rank(*texts)
+        # Convert to filename list
+        occurs = [(self.filenames[x],y) for x,y in occurs.items()]
+            
+        return self.sort(occurs)
+
+if __name__ == "__main__":
+    import sys
+    print TextRank('common',*sys.argv[1:]).rank()
diff --git a/Software Architecture with Python/2Writing Modifiable and Readable Code/i5urlrank.py b/Software Architecture with Python/2Writing Modifiable and Readable Code/i5urlrank.py
@@ -0,0 +1,27 @@
+import operator
+import requests
+
+class UrlRank(object):
+    """ Accept URLs as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *urls):
+        self.word = word.strip().lower()
+        self.urls = urls
+
+    def rank(self):
+        """ Rank the URLs. A tuple is returned with
+        (url, #occur) in decreasing order of
+        occurences """
+
+        occurs = []
+
+        for url in self.urls:
+            data = requests.get(url).content
+            words = map(lambda x: x.lower().strip(), data.split())
+            # Filter empty words
+            count = words.count(self.word)
+            occurs.append((url, count))
+
+        # Return in sorted order
+        return sorted(occurs, key=operator.itemgetter(1), reverse=True)
diff --git a/Software Architecture with Python/2Writing Modifiable and Readable Code/i5urlrank_improve.py b/Software Architecture with Python/2Writing Modifiable and Readable Code/i5urlrank_improve.py
@@ -0,0 +1,59 @@
+# Code listing #21
+
+""" Module urlrank - Rank URLs in order of degree of a specific word frequency """
+
+# Note: This is urlrank.py rewritten to use rankbase so called urlrank2.py
+
+import requests
+import operator
+
+from i5rankbase_improve import RankBase
+
+class UrlRank(object):
+    """ Accept URLs as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *urls):
+        self.word = word.strip().lower()
+        self.urls = urls
+
+    def rank(self):
+        """ Rank the URLs. A tuple is returned with
+        (url, #occur) in decreasing order of
+        occurences """
+
+        occurs = []
+        
+        for url in self.urls:
+            data = requests.get(url).content
+            words = map(lambda x: x.lower().strip(), data.split())
+            # Filter empty words
+            count = words.count(self.word)
+            occurs.append((url, count))
+
+        # Return in sorted order
+        return sorted(occurs, key=operator.itemgetter(1), reverse=True)
+
+class UrlRank(RankBase):
+    """ Accept URLs as inputs and rank them in
+    terms of how much a word occurs in them """
+
+    def __init__(self, word, *urls):
+        self.word = word.strip().lower()
+        self.urls = urls
+
+    def rank(self):
+        """ Rank the URLs. A tuple is returned with
+        (url, #occur) in decreasing order of
+        occurences """
+
+        texts = map(lambda x: requests.get(x).content, self.urls)
+        occurs = super(UrlRank, self).rank(*texts)
+        # Convert to URLs list
+        occurs = [(self.urls[x],y) for x,y in occurs.items()]
+
+        return self.sort(occurs)
+
+if __name__ == "__main__":
+    import sys
+    print(UrlRank('python',*sys.argv[1:]).rank())