summaryrefslogtreecommitdiff
path: root/BDA/tp3.py
diff options
context:
space:
mode:
authorMartial Simon <msimon_fr@hotmail.com>2026-03-01 22:42:08 +0100
committerMartial Simon <msimon_fr@hotmail.com>2026-03-01 22:42:08 +0100
commit9a02eacc574a5de22f0fccb32a7b20cae85ac097 (patch)
tree16ca07a2a90c5efb2c9009301f32e15f4e78feff /BDA/tp3.py
parent7c1f2354a5c34f0be273e16acebd699baa164266 (diff)
feat: semaine du 23 février
Diffstat (limited to 'BDA/tp3.py')
-rw-r--r--BDA/tp3.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/BDA/tp3.py b/BDA/tp3.py
new file mode 100644
index 0000000..ea2afd1
--- /dev/null
+++ b/BDA/tp3.py
@@ -0,0 +1,25 @@
+from mrjob.job import MRJob
+from mrjob.step import MRStep
+import re
+
+WORD_RE = re.compile(r"[\w']+")
+
+class MRWordFrequencyCount(MRJob):
+
+ def mapper(self, _, line):
+ for word in WORD_RE.findall(line):
+ yield word, 1
+ def steps(self):
+ return [
+ MRStep(mapper=self.mapper,
+ reducer=self.reducer_count_words),
+ MRStep(reducer=self.reducer_find_max_word)
+ ]
+
+ def reducer_count_words(self, key, values):
+ yield None, (sum(values), key)
+ def reducer_find_max_word(self, _, kvp):
+ yield max(kvp)
+
+if __name__ == '__main__':
+ MRWordFrequencyCount.run()