summaryrefslogtreecommitdiff
path: root/rushs/data-clash/step-3/napoleon_analysis.sql
blob: cdbb4bd752a9bcb62189eb7e7d8d0f5327f6bff9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
SELECT
    id,
    right(location, 7) AS location,
    end_time - start_time AS duration,
    left(details, 10) AS details,
    CASE
        WHEN
            NOT regexp_like(details, '^([[:ascii:]])*$')
            THEN 'invalid_characters'
        WHEN
            start_time > end_time OR location NOT LIKE '%MMT'
            THEN 'improbable_values'
        WHEN start_time IS NULL OR end_time IS NULL THEN 'missing_timestamp'
        WHEN
            location IS NULL OR latitude IS NULL OR longitude IS NULL
            THEN 'missing_location'
        WHEN
            EXISTS (
                SELECT *
                FROM napoleon_data.public_reports AS t
                WHERE
                    r.id > t.id
                    AND r.location = t.location
                    AND r.start_time = t.start_time
                    AND r.end_time = t.end_time
                    AND r.details = t.details
            )
            THEN 'duplicate_report'
    END AS issue
FROM napoleon_data.public_reports AS r
WHERE
    NOT regexp_like(details, '^([[:ascii:]])*$')
    OR start_time IS NULL OR end_time IS NULL
    OR start_time > end_time
    OR location NOT LIKE '%MMT'
    OR location IS NULL OR latitude IS NULL OR longitude IS NULL
    OR EXISTS (
        SELECT *
        FROM napoleon_data.public_reports AS t
        WHERE
            r.id > t.id
            AND r.location = t.location
            AND r.start_time = t.start_time
            AND r.end_time = t.end_time
            AND r.details = t.details
    )