summaryrefslogtreecommitdiff
path: root/rushs/data-clash/step-3/napoleon_timeline.sql
blob: eff8be1b52bc583bafc327f958ed67fc4f6ade79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
WITH rapports_valides AS (
    SELECT
        id,
        location,
        start_time,
        end_time,
        latitude,
        longitude
    FROM napoleon_data.public_reports
    WHERE location IS NOT NULL
    AND start_time IS NOT NULL
    AND end_time IS NOT NULL
    AND latitude IS NOT NULL
    AND longitude IS NOT NULL
),
rapports_ordonnes AS (
    SELECT
        id,
        location,
        start_time,
        end_time,
        latitude,
        longitude,
        LAG(id) OVER (ORDER BY start_time) AS pid,
        LAG(location) OVER (ORDER BY start_time) AS ploc,
        LAG(start_time) OVER (ORDER BY start_time) AS pstart,
        LAG(end_time) OVER (ORDER BY start_time) AS pend,
        LAG(latitude) OVER (ORDER BY start_time) AS plat,
        LAG(longitude) OVER (ORDER BY start_time) AS plong
    FROM rapports_valides
),
diffrapports AS (
    SELECT
        id,
        location,
        start_time,
        end_time,
        latitude,
        longitude,
        pid,
        ploc,
        pstart,
        pend,
        plat,
        plong,
        extract(EPOCH from (start_time - pend)) AS tdiff,
        (6371 * ACOS(
            COS(RADIANS(latitude)) * COS(RADIANS(plat))
            * COS(RADIANS(plong) - RADIANS(longitude))
            + SIN(RADIANS(latitude)) * SIN(RADIANS(plat))
        )) AS d
    FROM rapports_ordonnes
),
analyse_rapports AS (
    SELECT
        id,
        location,
        start_time,
        end_time,
        latitude,
        longitude,
        pid,
        ploc,
        pstart,
        pend,
        plat,
        plong,
        tdiff,
        d,
        CASE
            WHEN start_time < pend THEN 'overlapping_timeframes'
            WHEN tdiff < 7200 AND d > 500 THEN 'moving_fast'
            WHEN extract(EPOCH from (end_time - start_time)) > 86400 THEN 'long_duration'
            WHEN d IS NULL OR tdiff IS NULL THEN 'missing_data'
            WHEN d > 500 THEN 'long_distance'
            ELSE 'clear'
        END AS issue
    FROM diffrapports
)
SELECT
    tdiff AS time_difference,
    d AS distance,
    issue
FROM analyse_rapports
ORDER BY start_time