summaryrefslogtreecommitdiff
path: root/BDA/tp3.py
diff options
context:
space:
mode:
Diffstat (limited to 'BDA/tp3.py')
-rw-r--r--BDA/tp3.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/BDA/tp3.py b/BDA/tp3.py
new file mode 100644
index 0000000..ea2afd1
--- /dev/null
+++ b/BDA/tp3.py
@@ -0,0 +1,25 @@
+from mrjob.job import MRJob
+from mrjob.step import MRStep
+import re
+
+WORD_RE = re.compile(r"[\w']+")
+
+class MRWordFrequencyCount(MRJob):
+
+ def mapper(self, _, line):
+ for word in WORD_RE.findall(line):
+ yield word, 1
+ def steps(self):
+ return [
+ MRStep(mapper=self.mapper,
+ reducer=self.reducer_count_words),
+ MRStep(reducer=self.reducer_find_max_word)
+ ]
+
+ def reducer_count_words(self, key, values):
+ yield None, (sum(values), key)
+ def reducer_find_max_word(self, _, kvp):
+ yield max(kvp)
+
+if __name__ == '__main__':
+ MRWordFrequencyCount.run()