diff options
Diffstat (limited to 'rushs/data-clash/step-3/napoleon_timeline.sql')
| -rw-r--r-- | rushs/data-clash/step-3/napoleon_timeline.sql | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/rushs/data-clash/step-3/napoleon_timeline.sql b/rushs/data-clash/step-3/napoleon_timeline.sql new file mode 100644 index 0000000..eff8be1 --- /dev/null +++ b/rushs/data-clash/step-3/napoleon_timeline.sql @@ -0,0 +1,85 @@ +WITH rapports_valides AS ( + SELECT + id, + location, + start_time, + end_time, + latitude, + longitude + FROM napoleon_data.public_reports + WHERE location IS NOT NULL + AND start_time IS NOT NULL + AND end_time IS NOT NULL + AND latitude IS NOT NULL + AND longitude IS NOT NULL +), +rapports_ordonnes AS ( + SELECT + id, + location, + start_time, + end_time, + latitude, + longitude, + LAG(id) OVER (ORDER BY start_time) AS pid, + LAG(location) OVER (ORDER BY start_time) AS ploc, + LAG(start_time) OVER (ORDER BY start_time) AS pstart, + LAG(end_time) OVER (ORDER BY start_time) AS pend, + LAG(latitude) OVER (ORDER BY start_time) AS plat, + LAG(longitude) OVER (ORDER BY start_time) AS plong + FROM rapports_valides +), +diffrapports AS ( + SELECT + id, + location, + start_time, + end_time, + latitude, + longitude, + pid, + ploc, + pstart, + pend, + plat, + plong, + extract(EPOCH from (start_time - pend)) AS tdiff, + (6371 * ACOS( + COS(RADIANS(latitude)) * COS(RADIANS(plat)) + * COS(RADIANS(plong) - RADIANS(longitude)) + + SIN(RADIANS(latitude)) * SIN(RADIANS(plat)) + )) AS d + FROM rapports_ordonnes +), +analyse_rapports AS ( + SELECT + id, + location, + start_time, + end_time, + latitude, + longitude, + pid, + ploc, + pstart, + pend, + plat, + plong, + tdiff, + d, + CASE + WHEN start_time < pend THEN 'overlapping_timeframes' + WHEN tdiff < 7200 AND d > 500 THEN 'moving_fast' + WHEN extract(EPOCH from (end_time - start_time)) > 86400 THEN 'long_duration' + WHEN d IS NULL OR tdiff IS NULL THEN 'missing_data' + WHEN d > 500 THEN 'long_distance' + ELSE 'clear' + END AS issue + FROM diffrapports +) +SELECT + tdiff AS time_difference, + d AS distance, + issue +FROM analyse_rapports +ORDER BY start_time |
