diff options
| author | Martial Simon <msimon_fr@hotmail.com> | 2026-03-01 22:42:08 +0100 |
|---|---|---|
| committer | Martial Simon <msimon_fr@hotmail.com> | 2026-03-01 22:42:08 +0100 |
| commit | 9a02eacc574a5de22f0fccb32a7b20cae85ac097 (patch) | |
| tree | 16ca07a2a90c5efb2c9009301f32e15f4e78feff /BDA/tp3.py | |
| parent | 7c1f2354a5c34f0be273e16acebd699baa164266 (diff) | |
feat: semaine du 23 février
Diffstat (limited to 'BDA/tp3.py')
| -rw-r--r-- | BDA/tp3.py | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/BDA/tp3.py b/BDA/tp3.py new file mode 100644 index 0000000..ea2afd1 --- /dev/null +++ b/BDA/tp3.py @@ -0,0 +1,25 @@ +from mrjob.job import MRJob +from mrjob.step import MRStep +import re + +WORD_RE = re.compile(r"[\w']+") + +class MRWordFrequencyCount(MRJob): + + def mapper(self, _, line): + for word in WORD_RE.findall(line): + yield word, 1 + def steps(self): + return [ + MRStep(mapper=self.mapper, + reducer=self.reducer_count_words), + MRStep(reducer=self.reducer_find_max_word) + ] + + def reducer_count_words(self, key, values): + yield None, (sum(values), key) + def reducer_find_max_word(self, _, kvp): + yield max(kvp) + +if __name__ == '__main__': + MRWordFrequencyCount.run() |
