summaryrefslogtreecommitdiff
path: root/BDA/tp2.py
diff options
context:
space:
mode:
authorMartial Simon <msimon_fr@hotmail.com>2026-02-23 12:00:26 +0100
committerMartial Simon <msimon_fr@hotmail.com>2026-02-23 12:00:26 +0100
commit7c1f2354a5c34f0be273e16acebd699baa164266 (patch)
tree95233ea46f109c7658cded1a5526ec2a4b713cb3 /BDA/tp2.py
parent3b90f78fbbce0ccca74de1b39d2a1a8a9f423a58 (diff)
feat: BDA2
Diffstat (limited to 'BDA/tp2.py')
-rw-r--r--BDA/tp2.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/BDA/tp2.py b/BDA/tp2.py
new file mode 100644
index 0000000..6b5b00e
--- /dev/null
+++ b/BDA/tp2.py
@@ -0,0 +1,38 @@
+from mrjob.job import MRJob
+from mrjob.step import MRStep
+import re
+
+WORD_RE = re.compile(r"[\w']+")
+
+class MRWordFrequencyCount(MRJob):
+
+ def mapper(self, _, line):
+ for word in WORD_RE.findall(line):
+ yield word, 1
+ def steps(self):
+ return [
+ MRStep(mapper=self.mapper,
+ reducer=self.reducer_count_words),
+ MRStep(reducer=self.reducer_find_max_word)
+ ]
+
+ def reducer_count_words(self, key, values):
+ yield None, (sum(values), key)
+ def reducer_find_max_word(self, _, kvp):
+ yield max(kvp)
+
+class MRLongestWord(MRJob):
+
+ def mapper(self, _, line):
+ for word in WORD_RE.findall(line):
+ yield word, len(word)
+
+ def combiner(self, word, len_word):
+ yield None, (len_word[0], word)
+
+ def reducer(self, _, len_word):
+ yield max(len_word)
+
+if __name__ == '__main__':
+#MRWordFrequencyCount.run()
+ MRLongestWord.run()