summaryrefslogtreecommitdiff
path: root/rushs/data-clash/step-3
diff options
context:
space:
mode:
authorMartial Simon <msimon_fr@hotmail.com>2025-09-15 01:08:27 +0200
committerMartial Simon <msimon_fr@hotmail.com>2025-09-15 01:08:27 +0200
commitc9b6b9a5ca082fe7c1b6f58d7713f785a9eb6a5c (patch)
tree3e4f42f93c7ae89a364e4d51fff6e5cec4e55fa9 /rushs/data-clash/step-3
add: graphs et rushs
Diffstat (limited to 'rushs/data-clash/step-3')
-rw-r--r--rushs/data-clash/step-3/napoleon_analysis.sql46
-rw-r--r--rushs/data-clash/step-3/napoleon_connections.sql29
-rw-r--r--rushs/data-clash/step-3/napoleon_filter.sql30
-rw-r--r--rushs/data-clash/step-3/napoleon_synthesis.sql43
-rw-r--r--rushs/data-clash/step-3/napoleon_timeline.sql85
5 files changed, 233 insertions, 0 deletions
diff --git a/rushs/data-clash/step-3/napoleon_analysis.sql b/rushs/data-clash/step-3/napoleon_analysis.sql
new file mode 100644
index 0000000..cdbb4bd
--- /dev/null
+++ b/rushs/data-clash/step-3/napoleon_analysis.sql
@@ -0,0 +1,46 @@
+SELECT
+ id,
+ right(location, 7) AS location,
+ end_time - start_time AS duration,
+ left(details, 10) AS details,
+ CASE
+ WHEN
+ NOT regexp_like(details, '^([[:ascii:]])*$')
+ THEN 'invalid_characters'
+ WHEN
+ start_time > end_time OR location NOT LIKE '%MMT'
+ THEN 'improbable_values'
+ WHEN start_time IS NULL OR end_time IS NULL THEN 'missing_timestamp'
+ WHEN
+ location IS NULL OR latitude IS NULL OR longitude IS NULL
+ THEN 'missing_location'
+ WHEN
+ EXISTS (
+ SELECT *
+ FROM napoleon_data.public_reports AS t
+ WHERE
+ r.id > t.id
+ AND r.location = t.location
+ AND r.start_time = t.start_time
+ AND r.end_time = t.end_time
+ AND r.details = t.details
+ )
+ THEN 'duplicate_report'
+ END AS issue
+FROM napoleon_data.public_reports AS r
+WHERE
+ NOT regexp_like(details, '^([[:ascii:]])*$')
+ OR start_time IS NULL OR end_time IS NULL
+ OR start_time > end_time
+ OR location NOT LIKE '%MMT'
+ OR location IS NULL OR latitude IS NULL OR longitude IS NULL
+ OR EXISTS (
+ SELECT *
+ FROM napoleon_data.public_reports AS t
+ WHERE
+ r.id > t.id
+ AND r.location = t.location
+ AND r.start_time = t.start_time
+ AND r.end_time = t.end_time
+ AND r.details = t.details
+ )
diff --git a/rushs/data-clash/step-3/napoleon_connections.sql b/rushs/data-clash/step-3/napoleon_connections.sql
new file mode 100644
index 0000000..170f635
--- /dev/null
+++ b/rushs/data-clash/step-3/napoleon_connections.sql
@@ -0,0 +1,29 @@
+SELECT
+ t.id AS individual_id,
+ t.name,
+ t.role,
+ t.affiliation AS current_affiliation,
+ p.affiliation AS historical_affiliation
+FROM surveillance_data.current_targets AS t
+INNER JOIN napoleon_data.historical_people AS p
+ ON
+ t.name = p.name
+ OR t.affiliation != 'N/A'
+WHERE
+ EXISTS (
+ SELECT 1 FROM (SELECT
+ a1.group_name_1,
+ a2.group_name_2
+ FROM surveillance_data.group_affinities AS a1,
+ surveillance_data.group_affinities AS a2
+ WHERE a1.group_name_2 = a2.group_name_1
+ UNION
+ SELECT
+ group_name_1,
+ group_name_2
+ FROM surveillance_data.group_affinities) AS test
+ WHERE
+ (group_name_1 = p.affiliation AND t.affiliation = group_name_2)
+ OR (group_name_1 = t.affiliation AND group_name_2 = p.affiliation)
+ )
+ OR t.affiliation = p.affiliation
diff --git a/rushs/data-clash/step-3/napoleon_filter.sql b/rushs/data-clash/step-3/napoleon_filter.sql
new file mode 100644
index 0000000..797f085
--- /dev/null
+++ b/rushs/data-clash/step-3/napoleon_filter.sql
@@ -0,0 +1,30 @@
+WITH proximity_filtered AS (
+ SELECT DISTINCT ON (r.id)
+ r.id,
+ r.location,
+ r.details,
+ r.start_time,
+ MIN(6371 * ACOS(
+ COS(RADIANS(r.latitude)) * COS(RADIANS(l.latitude))
+ * COS(RADIANS(l.longitude) - RADIANS(r.longitude))
+ + SIN(RADIANS(r.latitude)) * SIN(RADIANS(l.latitude))
+ )) AS distance
+ FROM napoleon_data.public_reports AS r,
+ napoleon_data.critical_locations AS l
+ WHERE
+ 300 >= (6371 * ACOS(
+ COS(RADIANS(r.latitude)) * COS(RADIANS(l.latitude))
+ * COS(RADIANS(l.longitude) - RADIANS(r.longitude))
+ + SIN(RADIANS(r.latitude)) * SIN(RADIANS(l.latitude))
+ ))
+ AND details ILIKE '%Napoleon%'
+ AND start_time BETWEEN '2069-01-01' AND '2070-12-31'
+ GROUP BY r.id, r.location, r.details, r.start_time
+)
+
+SELECT
+ id,
+ location,
+ distance
+FROM proximity_filtered
+ORDER BY distance, id
diff --git a/rushs/data-clash/step-3/napoleon_synthesis.sql b/rushs/data-clash/step-3/napoleon_synthesis.sql
new file mode 100644
index 0000000..f71dd09
--- /dev/null
+++ b/rushs/data-clash/step-3/napoleon_synthesis.sql
@@ -0,0 +1,43 @@
+CREATE VIEW napoleon_affair_view AS
+WITH count_report AS (
+ SELECT
+ location,
+ COUNT(*) AS total_reports
+ FROM napoleon_data.public_reports
+ GROUP BY location
+),
+persons_mention AS (
+ SELECT
+ location,
+ name AS person,
+ count(*) AS occ
+ FROM napoleon_data.public_reports
+ INNER JOIN napoleon_data.historical_people
+ ON details LIKE concat('%', name, '%')
+ GROUP BY location, name
+),
+most_mentioned AS (
+ SELECT
+ location,
+ person AS most_mentioned_person,
+ occ AS occurrences_most_mentioned_person,
+ row_number() OVER (PARTITION BY location ORDER BY occ DESC) AS rn
+ FROM persons_mention
+),
+unique_people_count AS (
+ SELECT
+ location,
+ COUNT(DISTINCT person) AS total_people_mentioned
+ FROM persons_mention
+ GROUP BY location
+)
+
+SELECT
+ rc.location,
+ total_reports,
+ total_people_mentioned,
+ most_mentioned_person,
+ occurrences_most_mentioned_person
+FROM count_report AS rc
+INNER JOIN unique_people_count AS uc ON rc.location = uc.location
+INNER JOIN most_mentioned AS mmmm ON mmmm.location = rc.location AND mmmm.rn = 1;
diff --git a/rushs/data-clash/step-3/napoleon_timeline.sql b/rushs/data-clash/step-3/napoleon_timeline.sql
new file mode 100644
index 0000000..eff8be1
--- /dev/null
+++ b/rushs/data-clash/step-3/napoleon_timeline.sql
@@ -0,0 +1,85 @@
+WITH rapports_valides AS (
+ SELECT
+ id,
+ location,
+ start_time,
+ end_time,
+ latitude,
+ longitude
+ FROM napoleon_data.public_reports
+ WHERE location IS NOT NULL
+ AND start_time IS NOT NULL
+ AND end_time IS NOT NULL
+ AND latitude IS NOT NULL
+ AND longitude IS NOT NULL
+),
+rapports_ordonnes AS (
+ SELECT
+ id,
+ location,
+ start_time,
+ end_time,
+ latitude,
+ longitude,
+ LAG(id) OVER (ORDER BY start_time) AS pid,
+ LAG(location) OVER (ORDER BY start_time) AS ploc,
+ LAG(start_time) OVER (ORDER BY start_time) AS pstart,
+ LAG(end_time) OVER (ORDER BY start_time) AS pend,
+ LAG(latitude) OVER (ORDER BY start_time) AS plat,
+ LAG(longitude) OVER (ORDER BY start_time) AS plong
+ FROM rapports_valides
+),
+diffrapports AS (
+ SELECT
+ id,
+ location,
+ start_time,
+ end_time,
+ latitude,
+ longitude,
+ pid,
+ ploc,
+ pstart,
+ pend,
+ plat,
+ plong,
+ extract(EPOCH from (start_time - pend)) AS tdiff,
+ (6371 * ACOS(
+ COS(RADIANS(latitude)) * COS(RADIANS(plat))
+ * COS(RADIANS(plong) - RADIANS(longitude))
+ + SIN(RADIANS(latitude)) * SIN(RADIANS(plat))
+ )) AS d
+ FROM rapports_ordonnes
+),
+analyse_rapports AS (
+ SELECT
+ id,
+ location,
+ start_time,
+ end_time,
+ latitude,
+ longitude,
+ pid,
+ ploc,
+ pstart,
+ pend,
+ plat,
+ plong,
+ tdiff,
+ d,
+ CASE
+ WHEN start_time < pend THEN 'overlapping_timeframes'
+ WHEN tdiff < 7200 AND d > 500 THEN 'moving_fast'
+ WHEN extract(EPOCH from (end_time - start_time)) > 86400 THEN 'long_duration'
+ WHEN d IS NULL OR tdiff IS NULL THEN 'missing_data'
+ WHEN d > 500 THEN 'long_distance'
+ ELSE 'clear'
+ END AS issue
+ FROM diffrapports
+)
+SELECT
+ tdiff AS time_difference,
+ d AS distance,
+ issue
+FROM analyse_rapports
+ORDER BY start_time