diff options
Diffstat (limited to 'rushs/data-clash/step-3/napoleon_analysis.sql')
| -rw-r--r-- | rushs/data-clash/step-3/napoleon_analysis.sql | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/rushs/data-clash/step-3/napoleon_analysis.sql b/rushs/data-clash/step-3/napoleon_analysis.sql new file mode 100644 index 0000000..cdbb4bd --- /dev/null +++ b/rushs/data-clash/step-3/napoleon_analysis.sql @@ -0,0 +1,46 @@ +SELECT + id, + right(location, 7) AS location, + end_time - start_time AS duration, + left(details, 10) AS details, + CASE + WHEN + NOT regexp_like(details, '^([[:ascii:]])*$') + THEN 'invalid_characters' + WHEN + start_time > end_time OR location NOT LIKE '%MMT' + THEN 'improbable_values' + WHEN start_time IS NULL OR end_time IS NULL THEN 'missing_timestamp' + WHEN + location IS NULL OR latitude IS NULL OR longitude IS NULL + THEN 'missing_location' + WHEN + EXISTS ( + SELECT * + FROM napoleon_data.public_reports AS t + WHERE + r.id > t.id + AND r.location = t.location + AND r.start_time = t.start_time + AND r.end_time = t.end_time + AND r.details = t.details + ) + THEN 'duplicate_report' + END AS issue +FROM napoleon_data.public_reports AS r +WHERE + NOT regexp_like(details, '^([[:ascii:]])*$') + OR start_time IS NULL OR end_time IS NULL + OR start_time > end_time + OR location NOT LIKE '%MMT' + OR location IS NULL OR latitude IS NULL OR longitude IS NULL + OR EXISTS ( + SELECT * + FROM napoleon_data.public_reports AS t + WHERE + r.id > t.id + AND r.location = t.location + AND r.start_time = t.start_time + AND r.end_time = t.end_time + AND r.details = t.details + ) |
