forked from prateekiiest/Code-Sleep-Python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslation_hamlet.py
More file actions
64 lines (43 loc) · 1.87 KB
/
translation_hamlet.py
File metadata and controls
64 lines (43 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# input your code here!
def word_count_distribution(text):
word_counts = Counter(count_words_fast(text))
count_distribution = dict(Counter(word_counts.values()))
return count_distribution
distribution = word_count_distribution(text)
# input your code here!
def more_frequent(distribution):
z = {}
for i in range(len(distribution)):
if(distribution[i] > i):
z[i] = distribution[i]
return z
more_frequent(distribution)
hamlets = pd.DataFrame(columns = ["language","distribution"])
book_dir = "Books"
title_num = 1
for language in book_titles:
for author in book_titles[language]:
for title in book_titles[language][author]:
if title == "Hamlet":
inputfile = data_filepath+"Books/"+language+"/"+author+"/"+title+".txt"
text = read_book(inputfile)
distribution = word_count_distribution(text)
hamlets.loc[title_num] = language, distribution
title_num += 1
colors = ["crimson", "forestgreen", "blueviolet"]
handles, hamlet_languages = [], []
for index in range(hamlets.shape[0]):
language, distribution = hamlets.language[index+1], hamlets.distribution[index+1]
dist = more_frequent(distribution)
plot, = plt.loglog(sorted(list(dist.keys())),sorted(list(dist.values()),
reverse = True), color = colors[index], linewidth = 2)
handles.append(plot)
hamlet_languages.append(language)
plt.title("Word Frequencies in Hamlet Translations")
xlim = [0, 2e3]
xlabel = "Frequency of Word $W$"
ylabel = "Fraction of Words\nWith Greater Frequency than $W$"
plt.xlim(xlim); plt.xlabel(xlabel); plt.ylabel(ylabel)
plt.legend(handles, hamlet_languages, loc = "upper right", numpoints = 1)
# show your plot using `plt.show`!
plt.show()