From 9a02eacc574a5de22f0fccb32a7b20cae85ac097 Mon Sep 17 00:00:00 2001 From: Martial Simon Date: Sun, 1 Mar 2026 22:42:08 +0100 Subject: feat: semaine du 23 février MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- BDA/tp3.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 BDA/tp3.py (limited to 'BDA') diff --git a/BDA/tp3.py b/BDA/tp3.py new file mode 100644 index 0000000..ea2afd1 --- /dev/null +++ b/BDA/tp3.py @@ -0,0 +1,25 @@ +from mrjob.job import MRJob +from mrjob.step import MRStep +import re + +WORD_RE = re.compile(r"[\w']+") + +class MRWordFrequencyCount(MRJob): + + def mapper(self, _, line): + for word in WORD_RE.findall(line): + yield word, 1 + def steps(self): + return [ + MRStep(mapper=self.mapper, + reducer=self.reducer_count_words), + MRStep(reducer=self.reducer_find_max_word) + ] + + def reducer_count_words(self, key, values): + yield None, (sum(values), key) + def reducer_find_max_word(self, _, kvp): + yield max(kvp) + +if __name__ == '__main__': + MRWordFrequencyCount.run() -- cgit v1.2.3