-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
38 lines (26 loc) · 877 Bytes
/
example.py
File metadata and controls
38 lines (26 loc) · 877 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def mapper_word_count(input, doc_id):
import string
input = input.translate(str.maketrans('', '', string.punctuation))
input = input.split(" ")
word_op = []
for word in input:
if word != "":
word_op.append((word, 1))
return word_op
def reducer_word_count(input):
return sum(input)
def mapper_inverted_index(input, doc_id):
import string
input = input.translate(str.maketrans('', '', string.punctuation))
input = input.split(" ")
word_op = []
for word in input:
if word != "":
word_op.append((word + "_" + doc_id, 1))
return word_op
def reducer_inverted_index(input):
return sum(input)
from mapreduce import mapreduce
input_location = r"./Input"
run_object = mapreduce(num_mappers=5, num_reducers=5)
run_object.run(input_location, mapper_word_count, reducer_word_count)