From c9b6b9a5ca082fe7c1b6f58d7713f785a9eb6a5c Mon Sep 17 00:00:00 2001 From: Martial Simon Date: Mon, 15 Sep 2025 01:08:27 +0200 Subject: add: graphs et rushs --- rushs/data-clash/step-3/napoleon_analysis.sql | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 rushs/data-clash/step-3/napoleon_analysis.sql (limited to 'rushs/data-clash/step-3/napoleon_analysis.sql') diff --git a/rushs/data-clash/step-3/napoleon_analysis.sql b/rushs/data-clash/step-3/napoleon_analysis.sql new file mode 100644 index 0000000..cdbb4bd --- /dev/null +++ b/rushs/data-clash/step-3/napoleon_analysis.sql @@ -0,0 +1,46 @@ +SELECT + id, + right(location, 7) AS location, + end_time - start_time AS duration, + left(details, 10) AS details, + CASE + WHEN + NOT regexp_like(details, '^([[:ascii:]])*$') + THEN 'invalid_characters' + WHEN + start_time > end_time OR location NOT LIKE '%MMT' + THEN 'improbable_values' + WHEN start_time IS NULL OR end_time IS NULL THEN 'missing_timestamp' + WHEN + location IS NULL OR latitude IS NULL OR longitude IS NULL + THEN 'missing_location' + WHEN + EXISTS ( + SELECT * + FROM napoleon_data.public_reports AS t + WHERE + r.id > t.id + AND r.location = t.location + AND r.start_time = t.start_time + AND r.end_time = t.end_time + AND r.details = t.details + ) + THEN 'duplicate_report' + END AS issue +FROM napoleon_data.public_reports AS r +WHERE + NOT regexp_like(details, '^([[:ascii:]])*$') + OR start_time IS NULL OR end_time IS NULL + OR start_time > end_time + OR location NOT LIKE '%MMT' + OR location IS NULL OR latitude IS NULL OR longitude IS NULL + OR EXISTS ( + SELECT * + FROM napoleon_data.public_reports AS t + WHERE + r.id > t.id + AND r.location = t.location + AND r.start_time = t.start_time + AND r.end_time = t.end_time + AND r.details = t.details + ) -- cgit v1.2.3