"""Scenario 03 — Loitering Over Critical Infrastructure: data generator.

Composes shared generators from `generators/` to produce realtime, historical
and static data for the S3 demo.

Story: MV AALLOTAR (MMSI 230999401, declared AIS type 70 bulk carrier) loiters
for ~4.5 h directly over the catalog `cable-pipeline-junction` polygon
(BalticConnector × Estlink corridors) on 2025-03-18 11:30–15:45 UTC. During
the loiter, a cluster of 38 unique MACs from real industrial-IoT OUIs
(Espressif `24:0A:C4`, Texas Instruments `F4:5E:AB`, u-blox `A4:3C:5A`)
appears at MAC-PRK-COAST-01 (and faintly at MAC-INK-COAST-01 and
MAC-PRV-COAST-01); 3 of them — one per OUI — continue emitting from the
same coordinates for 36+ hours after AALLOTAR departs east toward Tallinn.
M/V VENLA RESEARCH (MMSI 230888011, declared AIS type 52) conducts a
legitimate seabed survey ~3.7 NM SE of the polygon during the same window
as a false-positive decoy.

All shared logic comes from `generators/`; this file only composes it and
adds scenario-specific helpers (loiter-loop waypoint synthesis, custom
industrial-IoT static MAC emission tuned for open-water long-range
propagation, decoy-crew MAC injection).
"""
from __future__ import annotations

import json
import math
import random
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any

REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from generators.common import (  # noqa: E402
    ambient_mmsi,
    crew_by_ship,
    haversine_m,
    load_infrastructure,
    load_sensors,
    maybe_decimate_mac_ndjson,
    maybe_decimate_ndjson,
    sensor_lookup,
    write_csv,
    write_geojson,
    write_ndjson,
)
from generators.ais_generator import AisTrack, ais_snapshot_geojson, emit_ais  # noqa: E402
from generators.common import load_personas  # noqa: E402
from generators.mac_generator import (  # noqa: E402
    MAC_CSV_HEADER,
    MacObservation,
    MovingMacEmitter,
    simulate_moving_mac,
)
from generators.radar_generator import RadarTrack, emit_radar  # noqa: E402

UTC = timezone.utc
SCENARIO_DIR = Path(__file__).resolve().parent
OUT_REALTIME = SCENARIO_DIR / "data" / "realtime"
OUT_STATIC = SCENARIO_DIR / "data" / "static"
OUT_HISTORICAL = SCENARIO_DIR / "data" / "historical"

# ---------------------------------------------------------------------------
# Anchor times
# ---------------------------------------------------------------------------
WINDOW_OPEN = datetime(2025, 3, 15, 0, 0, 0, tzinfo=UTC)
WINDOW_CLOSE = datetime(2025, 3, 21, 0, 0, 0, tzinfo=UTC)

AAL_DEPART = datetime(2025, 3, 18, 8, 0, 0, tzinfo=UTC)
AAL_POLY_ENTRY = datetime(2025, 3, 18, 11, 18, 0, tzinfo=UTC)
LOITER_T0 = datetime(2025, 3, 18, 11, 30, 0, tzinfo=UTC)
LOITER_T1 = datetime(2025, 3, 18, 15, 30, 0, tzinfo=UTC)
AAL_POLY_EXIT = datetime(2025, 3, 18, 15, 45, 0, tzinfo=UTC)
AAL_ARRIVE = datetime(2025, 3, 18, 17, 30, 0, tzinfo=UTC)

VENLA_T0 = datetime(2025, 3, 18, 12, 40, 0, tzinfo=UTC)
VENLA_T1 = datetime(2025, 3, 18, 18, 0, 0, tzinfo=UTC)

# Anchor MACs persist beyond loiter, split into multiple slices so each
# static_window run produces a bounded number of observations
ANCHOR_SLICE_A = (datetime(2025, 3, 18, 15, 45, 0, tzinfo=UTC),
                  datetime(2025, 3, 19,  6,  0, 0, tzinfo=UTC))   # ~14 h
ANCHOR_SLICE_B = (datetime(2025, 3, 19,  6,  0, 0, tzinfo=UTC),
                  datetime(2025, 3, 19, 22,  0, 0, tzinfo=UTC))   # 16 h
ANCHOR_SLICE_C = (datetime(2025, 3, 19, 22,  0, 0, tzinfo=UTC),
                  datetime(2025, 3, 20, 12,  0, 0, tzinfo=UTC))   # 14 h
ANCHOR_SLICE_D = (datetime(2025, 3, 20, 12,  0, 0, tzinfo=UTC),
                  datetime(2025, 3, 21,  0,  0, 0, tzinfo=UTC))   # 12 h
ANCHOR_TOTAL_HOURS = sum(
    (b - a).total_seconds() for a, b in
    (ANCHOR_SLICE_A, ANCHOR_SLICE_B, ANCHOR_SLICE_C, ANCHOR_SLICE_D)
) / 3600.0   # ≈ 56 h, covers spec's 36+ h target

# ---------------------------------------------------------------------------
# Catalog JUNCTION polygon (read from catalogs/infrastructure.geojson)
# Centroid is computed below; kept in module scope.
# ---------------------------------------------------------------------------
INFRA_FC = load_infrastructure()
_JUNCTION_FEAT = next(f for f in INFRA_FC["features"]
                      if f["properties"]["featureId"] == "cable-pipeline-junction")
_JUNCTION_RING = _JUNCTION_FEAT["geometry"]["coordinates"][0]
_lons = [p[0] for p in _JUNCTION_RING[:-1]]
_lats = [p[1] for p in _JUNCTION_RING[:-1]]
JUNCTION_CLAT = sum(_lats) / len(_lats)
JUNCTION_CLON = sum(_lons) / len(_lons)
# Junction centroid ≈ (59.88425, 24.5700)

# ---------------------------------------------------------------------------
# AALLOTAR transit waypoints — adapted to catalog JUNCTION location
# ---------------------------------------------------------------------------
AAL_WAYPOINTS_PRE = [
    (AAL_DEPART,                                60.1450, 24.9100),  # Helsinki West Harbour
    (AAL_DEPART + timedelta(minutes=90),        60.0050, 24.5200),  # TSS entry
    (AAL_DEPART + timedelta(minutes=165),       59.9650, 24.6100),  # Porkkala approach
    (AAL_DEPART + timedelta(minutes=182),       59.9520, 24.5980),  # Deviation start
    (AAL_POLY_ENTRY,                            59.9210, 24.5900),  # Polygon N edge
    (LOITER_T0,                                 JUNCTION_CLAT + 0.003,
                                                JUNCTION_CLON + 0.005),  # Drift in
]

AAL_WAYPOINTS_POST = [
    (LOITER_T1,                                 JUNCTION_CLAT - 0.004,
                                                JUNCTION_CLON + 0.0005),  # Begin exit
    (AAL_POLY_EXIT,                             59.8470, 24.5750),  # Polygon S edge
    (AAL_DEPART + timedelta(hours=8, minutes=30), 59.7100, 24.6300),  # Mid-Gulf SW
    (AAL_ARRIVE,                                59.5500, 24.7500),  # Tallinn approach
]


def gen_loiter_waypoints(
    t0: datetime,
    t_end: datetime,
    clat: float,
    clon: float,
    *,
    loop_radius_m: float = 300.0,
    n_loops: int = 4,
    step_s: int = 60,
) -> list[tuple[datetime, float, float]]:
    """Synthesize loiter waypoints: 30 min slow drift + n overlapping ~600 m
    loops + a brief stationary tail. Uses polar math; no external geometry deps.

    Each loop is a parametric circle of radius `loop_radius_m` (so diameter
    ≈ 600 m) centered at a small offset from the JUNCTION centroid; the loops
    are slightly offset from one another to produce the overlapping pattern
    described in the spec.
    """
    out: list[tuple[datetime, float, float]] = []
    total_s = (t_end - t0).total_seconds()
    drift_s = 30 * 60
    tail_s = 15 * 60
    loops_s = total_s - drift_s - tail_s
    each_loop_s = loops_s / n_loops

    m_per_deg_lat = 111_000.0
    m_per_deg_lon = 111_000.0 * math.cos(math.radians(clat))
    dlat_per_m = 1.0 / m_per_deg_lat
    dlon_per_m = 1.0 / m_per_deg_lon

    # Drift from a NE corner toward the centroid (linear interp)
    drift_start_lat = clat + 0.0028   # ~ +311 m N
    drift_start_lon = clon + 0.0050   # ~ +278 m E
    n_drift = max(1, int(drift_s // step_s))
    for i in range(n_drift):
        f = i / max(1, n_drift - 1) if n_drift > 1 else 1.0
        lat = drift_start_lat + f * (clat - drift_start_lat)
        lon = drift_start_lon + f * (clon - drift_start_lon)
        out.append((t0 + timedelta(seconds=i * step_s), lat, lon))

    # Overlapping loops — small inter-loop centre offsets so the four circles
    # produce a "cloverleaf" pattern centred on the polygon centroid.
    loop_centre_offsets = [
        (+0.0010, +0.0010),
        (-0.0010, +0.0015),
        (+0.0008, -0.0012),
        (-0.0012, -0.0008),
    ]
    t_loop_start = t0 + timedelta(seconds=drift_s)
    for li in range(n_loops):
        olat, olon = loop_centre_offsets[li]
        lc_lat, lc_lon = clat + olat, clon + olon
        n_pts = max(8, int(each_loop_s // step_s))
        # Each loop completes ~1.25 revolutions so neighbouring loops overlap visibly
        theta_total = 2.0 * math.pi * 1.25
        for i in range(n_pts):
            theta = theta_total * i / n_pts
            lat = lc_lat + dlat_per_m * loop_radius_m * math.sin(theta)
            lon = lc_lon + dlon_per_m * loop_radius_m * math.cos(theta)
            t_pt = t_loop_start + timedelta(seconds=int(li * each_loop_s + i * step_s))
            out.append((t_pt, lat, lon))

    # Stationary tail (small ±20 m wander)
    t_tail = t_loop_start + timedelta(seconds=int(n_loops * each_loop_s))
    n_tail = max(1, int((t_end - t_tail).total_seconds() // step_s))
    for i in range(n_tail):
        wander_lat = (i % 3 - 1) * 0.0001
        wander_lon = (i % 5 - 2) * 0.0001
        out.append((t_tail + timedelta(seconds=i * step_s),
                    clat + wander_lat, clon + wander_lon))
    return out


AAL_LOITER_WAYPOINTS = gen_loiter_waypoints(
    LOITER_T0, LOITER_T1, JUNCTION_CLAT, JUNCTION_CLON,
    loop_radius_m=300.0, n_loops=4, step_s=60,
)
AAL_WAYPOINTS_ALL = AAL_WAYPOINTS_PRE + AAL_LOITER_WAYPOINTS + AAL_WAYPOINTS_POST

# ---------------------------------------------------------------------------
# VENLA RESEARCH lawn-mower decoy survey box (~3.7 NM SE of JUNCTION centroid)
# ---------------------------------------------------------------------------
VENLA_BOX_LAT = 59.8250   # ~ centred SE of polygon, outside 500 m buffer
VENLA_BOX_LON = 24.6580
VENLA_LINE_LEN_DEG = 0.012   # ~ 1.3 km N-S
VENLA_LINE_SPACING_DEG_LON = 0.00075   # ~ 75 m at 60° lat (line spacing)

def gen_venla_waypoints() -> list[tuple[datetime, float, float]]:
    """Six parallel N-S survey lines + connectors at ~3.2 kn."""
    wps: list[tuple[datetime, float, float]] = []
    # Approach from NW
    wps.append((VENLA_T0,
                VENLA_BOX_LAT + 0.020, VENLA_BOX_LON - 0.020))
    n_lines = 6
    line_dur_s = 9 * 60   # 9 min per line
    connector_dur_s = 1 * 60
    t = VENLA_T0 + timedelta(minutes=10)
    for i in range(n_lines):
        lon = VENLA_BOX_LON - (n_lines / 2) * VENLA_LINE_SPACING_DEG_LON \
              + i * VENLA_LINE_SPACING_DEG_LON
        # N end then S end (alternate)
        if i % 2 == 0:
            wps.append((t, VENLA_BOX_LAT + VENLA_LINE_LEN_DEG / 2, lon))
            t = t + timedelta(seconds=line_dur_s)
            wps.append((t, VENLA_BOX_LAT - VENLA_LINE_LEN_DEG / 2, lon))
        else:
            wps.append((t, VENLA_BOX_LAT - VENLA_LINE_LEN_DEG / 2, lon))
            t = t + timedelta(seconds=line_dur_s)
            wps.append((t, VENLA_BOX_LAT + VENLA_LINE_LEN_DEG / 2, lon))
        t = t + timedelta(seconds=connector_dur_s)
    # Depart NE
    wps.append((VENLA_T1 - timedelta(minutes=20),
                VENLA_BOX_LAT + 0.010, VENLA_BOX_LON + 0.020))
    wps.append((VENLA_T1, 59.8400, 24.7000))
    return wps

VENLA_WAYPOINTS = gen_venla_waypoints()

# VENLA decoy crew — invented consumer-OUI MACs (scenario-local, NOT in personas.json)
VENLA_CREW = [
    {"id": "P-VEN-MASTER",     "ship": "MV VENLA RESEARCH", "role": "Master",
     "device_mac": "A4:83:E7:5C:9B:71", "device": "iPhone",        "oui_vendor": "Apple"},
    {"id": "P-VEN-CHOFF",      "ship": "MV VENLA RESEARCH", "role": "Chief Officer",
     "device_mac": "38:F9:D3:11:22:71", "device": "Samsung phone", "oui_vendor": "Samsung"},
    {"id": "P-VEN-RESEARCHER", "ship": "MV VENLA RESEARCH", "role": "Researcher",
     "device_mac": "04:CF:8C:55:66:75", "device": "Xiaomi phone",  "oui_vendor": "Xiaomi"},
    {"id": "P-VEN-MATE",       "ship": "MV VENLA RESEARCH", "role": "Mate",
     "device_mac": "B0:7D:64:A1:5B:77", "device": "AirTag BLE",    "oui_vendor": "Apple"},
]

# ---------------------------------------------------------------------------
# Industrial-IoT cluster MACs (37 total + 1 spec wording-compat = 38)
# ---------------------------------------------------------------------------
def build_cluster_macs(seed: int = 1801) -> list[dict[str, Any]]:
    """38 unique cluster MACs: 17 Espressif + 13 TI + 8 u-blox.

    Each carries a (lat, lon) offset around the JUNCTION centroid in the
    ±60 m range — represents devices deployed in a tight cloverleaf around
    the loiter point.
    """
    rng = random.Random(seed)
    out: list[dict[str, Any]] = []
    plan = [
        ("24:0A:C4", "Espressif",         17),
        ("F4:5E:AB", "Texas Instruments", 13),
        ("A4:3C:5A", "u-blox",            8),
    ]
    suffix_counter = 0
    m_per_deg_lat = 111_000.0
    m_per_deg_lon = 111_000.0 * math.cos(math.radians(JUNCTION_CLAT))
    for prefix, vendor, n in plan:
        for i in range(n):
            suffix_counter += 1
            mac = f"{prefix}:{(suffix_counter >> 16) & 0xFF:02X}:{(suffix_counter >> 8) & 0xFF:02X}:{suffix_counter & 0xFF:02X}"
            dx_m = rng.uniform(-60.0, 60.0)
            dy_m = rng.uniform(-60.0, 60.0)
            lat = JUNCTION_CLAT + dy_m / m_per_deg_lat
            lon = JUNCTION_CLON + dx_m / m_per_deg_lon
            out.append({"mac": mac, "vendor": vendor, "lat": lat, "lon": lon,
                        "is_anchor": False, "ordinal": i, "prefix": prefix})
    # Mark one anchor per OUI (first item of each group)
    anchors_marked = set()
    for rec in out:
        if rec["prefix"] not in anchors_marked:
            rec["is_anchor"] = True
            anchors_marked.add(rec["prefix"])
    # Stable, human-friendly anchor MAC names (overwrite the first MAC of each group)
    by_prefix: dict[str, list[dict[str, Any]]] = {}
    for rec in out:
        by_prefix.setdefault(rec["prefix"], []).append(rec)
    by_prefix["24:0A:C4"][0]["mac"] = "24:0A:C4:11:00:01"
    by_prefix["F4:5E:AB"][0]["mac"] = "F4:5E:AB:11:00:01"
    by_prefix["A4:3C:5A"][0]["mac"] = "A4:3C:5A:22:00:11"
    return out

CLUSTER_MACS = build_cluster_macs()

# ---------------------------------------------------------------------------
# Sensors / infrastructure subsets used in this scenario
# ---------------------------------------------------------------------------
SENSORS_USED_IDS = {
    "MAC-INK-COAST-01",
    "MAC-PRK-COAST-01", "MAC-PRK-COAST-02",
    "MAC-PRV-COAST-01",
    "MAC-HEL-COAST-01", "MAC-HEL-PORT-01",
    "MAC-AIR-PLN-01",
    "RAD-COAST-PRK-01",
    "RAD-PLN-01",
}

INFRA_USED_IDS = {
    "balticconnector", "balticconnector-buffer",
    "estlink-1", "estlink-1-buffer",
    "estlink-2", "estlink-2-buffer",
    "cable-pipeline-junction",
    "shipping-lane-eb",
    "finnish-eez-gof",
}


# ---------------------------------------------------------------------------
# Custom industrial-IoT static MAC emission
#
# `simulate_static_mac` uses path-loss n=2.8 hard-coded which is too lossy for
# industrial-IoT antennas operating over open water. We mirror its loop here
# with n=2.0 (line-of-sight) and a higher TX power, then filter to coastal
# sensors within plausible long-range earshot. RSSI bands target the spec:
#   active cluster: −82..−74 dBm at ~14 km PRK sensor pair (achieved via
#     tx_dbm = -5, n = 2.0); anchors weaker (tx_dbm = -10).
# ---------------------------------------------------------------------------
def _industrial_iot_rssi(d_m: float, *, tx_dbm: float, n: float,
                         noise_db: float, rng: random.Random,
                         floor_dbm: float = -110.0) -> float:
    d = max(d_m, 1.0)
    rssi = tx_dbm - 10.0 * n * math.log10(d)
    rssi += rng.gauss(0.0, noise_db)
    return max(floor_dbm, rssi)


def emit_industrial_cluster_static(
    rec: dict[str, Any],
    active_windows: list[tuple[datetime, datetime]],
    sensors: dict[str, dict[str, Any]],
    *,
    is_anchor: bool,
    session_window_s: float,
    seed: int,
) -> list[MacObservation]:
    """Emit MacObservation rows for one industrial-IoT MAC across its
    `active_windows` to every coastal MAC sensor that can hear it. Long-range
    RSSI uses n=2.0 line-of-sight propagation tuned for spec bands."""
    rng = random.Random(seed)
    out: list[MacObservation] = []

    tx_dbm = -5.0 if not is_anchor else -10.0
    noise_db = 2.3 if not is_anchor else 1.1
    # Pre-filter sensors (skip airborne / port-clusters far from polygon).
    # Effective over-water radio horizon for low-mounted antennas is
    # ~30-35 km; cap detection range at 40 km to avoid implausible hits at
    # Hanko / Helsinki port clusters > 60 km away.
    candidates: list[tuple[str, dict[str, Any], float]] = []
    for sid, s in sensors.items():
        if s.get("kind") != "mac" or s.get("subtype") == "airborne":
            continue
        d = haversine_m(s["lat"], s["lon"], rec["lat"], rec["lon"])
        if d > 40_000:
            continue
        candidates.append((sid, s, d))

    for win_start, win_end in active_windows:
        t = win_start
        while t < win_end:
            win = (t, min(win_end, t + timedelta(seconds=session_window_s)))
            for sid, _s, d in candidates:
                rssi = _industrial_iot_rssi(d, tx_dbm=tx_dbm, n=2.0,
                                            noise_db=noise_db, rng=rng)
                if rssi < -105.0:
                    continue
                # Skip occasional sample to avoid every sensor every cycle
                # at the very faint sensors (mimics propagation dropouts)
                if rssi < -98.0 and rng.random() < 0.55:
                    continue
                msg_count = max(1, int(rng.gauss(28 if not is_anchor else 8, 6)))
                out.append(MacObservation(
                    sensor_id=sid,
                    mac=rec["mac"],
                    session_start=win[0],
                    session_end=win[1],
                    message_count=msg_count,
                    avg_rssi=rssi,
                    manufacturer=rec["vendor"],
                    version="1.4.2",
                    status="OK",
                ))
            t = win[1]
    return out


# ---------------------------------------------------------------------------
# Consumer-only background MAC noise (industrial-IoT OUIs filtered out)
#
# The shared `generate_background_macs` draws from every OUI in
# personas.json — including Espressif / TI / u-blox — which would
# contaminate both the realtime baseline windows and the 28-day historical
# baseline (the very signal the scenario depends on). Here we mirror the
# loop with an OUI allowlist of consumer prefixes only.
# ---------------------------------------------------------------------------
_INDUSTRIAL_IOT_VENDORS = {"Espressif", "TexasInstruments", "u-blox"}


def consumer_background_macs(
    sensors: dict[str, dict[str, Any]],
    start: datetime,
    end: datetime,
    *,
    mac_count: int = 45,
    cadence_s: float = 420.0,
    seed: int = 42,
) -> list[MacObservation]:
    personas = load_personas()
    rng = random.Random(seed)
    oui_choices: list[tuple[str, str]] = []
    for vendor, prefixes in personas["oui_vendors_real"].items():
        if vendor in _INDUSTRIAL_IOT_VENDORS:
            continue
        for p in prefixes:
            oui_choices.append((vendor, p))

    macs: list[tuple[str, str]] = []
    for _ in range(mac_count):
        vendor, prefix = rng.choice(oui_choices)
        suffix = ":".join(f"{rng.randint(0, 255):02X}" for _ in range(3))
        macs.append((f"{prefix}:{suffix}", vendor))

    out: list[MacObservation] = []
    t = start
    while t < end:
        for sid, sensor in sensors.items():
            if sensor.get("kind") != "mac" or sensor.get("subtype") == "airborne":
                continue
            n = rng.randint(0, 3)
            chosen = rng.sample(macs, k=min(n, len(macs)))
            for mac, vendor in chosen:
                rssi = rng.uniform(-105, -60)
                out.append(MacObservation(
                    sensor_id=sid,
                    mac=mac,
                    session_start=t,
                    session_end=t + timedelta(seconds=cadence_s),
                    message_count=rng.randint(1, 50),
                    avg_rssi=rssi,
                    manufacturer=vendor,
                ))
        t = t + timedelta(seconds=cadence_s)
    return out


# ---------------------------------------------------------------------------
# Ambient background AIS
# ---------------------------------------------------------------------------
def build_ambient_ais(n_ships: int, start: datetime, end: datetime,
                      seed: int) -> list[dict[str, Any]]:
    rng = random.Random(seed)
    out: list[dict[str, Any]] = []
    for i in range(n_ships):
        eastbound = rng.random() < 0.5
        lat0 = rng.uniform(59.65, 60.30)
        lat1 = lat0 + rng.uniform(-0.10, 0.10)
        if eastbound:
            lon0, lon1 = 22.5, 27.5
        else:
            lon0, lon1 = 27.5, 22.5
        # Distribute over 6 days uniformly
        day_offset = rng.uniform(0.0, (end - start).total_seconds() - 6 * 3600)
        t_start = start + timedelta(seconds=day_offset)
        t_end = t_start + timedelta(minutes=rng.uniform(70, 160))
        if t_end > end:
            continue
        mmsi = ambient_mmsi(rng, "FI" if rng.random() < 0.7 else ("EE" if rng.random() < 0.7 else "OTHER"))
        track = AisTrack(
            mmsi=mmsi,
            waypoints=[(t_start, lat0, lon0), (t_end, lat1, lon1)],
            cadence_s=15.0,   # tighter cadence to bulk realtime volume
            destination="FIHEL" if eastbound else "EETLL",
            seed=seed + i,
        )
        out.extend(emit_ais(track))
    return out


# ---------------------------------------------------------------------------
# Realtime
# ---------------------------------------------------------------------------
def generate_realtime() -> dict[str, int]:
    sensors = sensor_lookup()
    counts: dict[str, int] = {}

    # ----- AIS — AALLOTAR -----
    aal_track = AisTrack(
        mmsi=230999401,
        waypoints=AAL_WAYPOINTS_ALL,
        cadence_s=10.0,
        destination="EETLL",
        nav_status=0,
        speed_jitter_kn=0.3,
        course_jitter_deg=2.5,
        seed=1801,
    )
    aal_msgs = emit_ais(aal_track)

    # ----- AIS — VENLA RESEARCH decoy -----
    venla_track = AisTrack(
        mmsi=230888011,
        waypoints=VENLA_WAYPOINTS,
        cadence_s=15.0,
        destination="FIHEL",
        nav_status=7,
        speed_jitter_kn=0.3,
        course_jitter_deg=3.5,
        seed=1802,
    )
    venla_msgs = emit_ais(venla_track)

    # ----- AIS — background fleet over 6-day window -----
    ambient_msgs = build_ambient_ais(n_ships=900, start=WINDOW_OPEN,
                                     end=WINDOW_CLOSE, seed=1803)

    ais_all = aal_msgs + venla_msgs + ambient_msgs
    counts["ais.ndjson"] = write_ndjson(
        OUT_REALTIME / "ais.ndjson", ais_all,
        "s3-loitering-critical-infra/ais")

    snapshot_features = ais_snapshot_geojson(ais_all)
    counts["ais_snapshot.geojson"] = write_geojson(
        OUT_REALTIME / "ais_snapshot.geojson", snapshot_features,
        "s3-loitering-critical-infra/ais_snapshot")

    # ----- Plane radar (RAD-PLN-01) background pass over AAL during transit -----
    plane_radar_track = RadarTrack(
        track_id="PLN-TRK-S3-0007",
        sensor_id="RAD-PLN-01",
        waypoints=[
            (AAL_DEPART + timedelta(minutes=90),
             AAL_WAYPOINTS_ALL[1][1], AAL_WAYPOINTS_ALL[1][2]),
            (AAL_DEPART + timedelta(minutes=210),
             AAL_WAYPOINTS_ALL[3][1], AAL_WAYPOINTS_ALL[3][2]),
            (AAL_DEPART + timedelta(minutes=285),
             JUNCTION_CLAT + 0.002, JUNCTION_CLON + 0.001),
            (AAL_DEPART + timedelta(minutes=375),
             JUNCTION_CLAT, JUNCTION_CLON),
        ],
        cadence_s=6.0,
        classification="surface_large",
        rcs_m2=8200.0,
        confidence=0.92,
        seed=1804,
    )
    plane_radar_msgs = emit_radar(plane_radar_track)
    for r in plane_radar_msgs:
        r["platform"] = "MAC-AIR-PLN-01"
        r["mmsi_hint"] = 230999401
    counts["plane_radar.ndjson"] = write_ndjson(
        OUT_REALTIME / "plane_radar.ndjson", plane_radar_msgs,
        "s3-loitering-critical-infra/plane_radar")

    # ----- Coastal radar RAD-COAST-PRK-01 corroborates the loiter -----
    # Track from polygon entry through exit (range ≤ 20 NM throughout)
    coastal_radar_waypoints = [(t, lat, lon) for (t, lat, lon)
                               in AAL_WAYPOINTS_ALL
                               if AAL_WAYPOINTS_PRE[3][0] <= t <= AAL_POLY_EXIT]
    coastal_radar_track = RadarTrack(
        track_id="RCP-2025-03-18-00471",
        sensor_id="RAD-COAST-PRK-01",
        waypoints=coastal_radar_waypoints,
        cadence_s=4.0,
        classification="surface_large",
        rcs_m2=8400.0,
        confidence=0.88,
        seed=1805,
    )
    coastal_radar_msgs = emit_radar(coastal_radar_track)
    for r in coastal_radar_msgs:
        r["mmsi_hint"] = 230999401
        # Computed range/bearing from sensor
        s = sensors["RAD-COAST-PRK-01"]
        d_m = haversine_m(s["lat"], s["lon"], r["lat"], r["lon"])
        r["range_nm"] = round(d_m / 1852.0, 2)
    counts["coastal_radar.ndjson"] = write_ndjson(
        OUT_REALTIME / "coastal_radar.ndjson", coastal_radar_msgs,
        "s3-loitering-critical-infra/coastal_radar")

    # ----- MAC sensor stream -----
    mac_obs: list[MacObservation] = []

    # AALLOTAR crew — persistent MACs, riding the whole transit + loiter
    aal_active = [(AAL_DEPART - timedelta(minutes=30),
                   AAL_ARRIVE + timedelta(minutes=15))]
    for i, p in enumerate(crew_by_ship("MV AALLOTAR")):
        if not p.get("persistent"):
            continue
        em = MovingMacEmitter(
            mac=p["device_mac"],
            manufacturer=p["oui_vendor"],
            waypoints=AAL_WAYPOINTS_ALL,
            active_windows=aal_active,
            seed=1900 + i,
        )
        mac_obs.extend(simulate_moving_mac(em, sensors, session_window_s=180.0))

    # VENLA decoy crew — only while on station
    venla_active = [(VENLA_T0 - timedelta(minutes=5),
                     VENLA_T1 + timedelta(minutes=5))]
    for i, p in enumerate(VENLA_CREW):
        em = MovingMacEmitter(
            mac=p["device_mac"],
            manufacturer=p["oui_vendor"],
            waypoints=VENLA_WAYPOINTS,
            active_windows=venla_active,
            seed=2000 + i,
        )
        mac_obs.extend(simulate_moving_mac(em, sensors, session_window_s=180.0))

    # Industrial-IoT cluster — 38 static MACs around JUNCTION centroid
    cluster_loiter_window = [(LOITER_T0, AAL_POLY_EXIT)]
    for k, rec in enumerate(CLUSTER_MACS):
        mac_obs.extend(emit_industrial_cluster_static(
            rec, cluster_loiter_window, sensors,
            is_anchor=False, session_window_s=120.0, seed=3000 + k,
        ))

    # 3 anchor MACs — persist in multiple slices for 36+ h
    anchor_slices = [ANCHOR_SLICE_A, ANCHOR_SLICE_B, ANCHOR_SLICE_C, ANCHOR_SLICE_D]
    anchors = [r for r in CLUSTER_MACS if r["is_anchor"]]
    for k, rec in enumerate(anchors):
        mac_obs.extend(emit_industrial_cluster_static(
            rec, anchor_slices, sensors,
            is_anchor=True, session_window_s=300.0, seed=4000 + k,
        ))

    # Background coastal MAC noise across the 6-day window
    # Generate in 24-hour slices to keep memory in check
    bg_seed = 5000
    one_day = timedelta(days=1)
    t = WINDOW_OPEN
    while t < WINDOW_CLOSE:
        bg = consumer_background_macs(
            sensors, t, min(t + one_day, WINDOW_CLOSE),
            mac_count=45, cadence_s=420.0, seed=bg_seed,
        )
        mac_obs.extend(bg)
        bg_seed += 1
        t += one_day

    mac_nd = [m.to_ndjson() for m in mac_obs]
    counts["mac.ndjson"] = write_ndjson(
        OUT_REALTIME / "mac.ndjson", mac_nd,
        "s3-loitering-critical-infra/mac")
    mac_rows = [m.to_csv_row() for m in mac_obs]
    counts["mac.csv"] = write_csv(
        OUT_REALTIME / "mac.csv", MAC_CSV_HEADER, mac_rows,
        "s3-loitering-critical-infra/mac_sessions")

    # Decimated companion files for any realtime NDJSON > 20 MB
    decim_reports = []
    AIS_DECIM_FIELDS = ["timestamp", "lat", "lon", "sog_kn", "cog_deg", "nav_status"]
    RADAR_DECIM_FIELDS = ["timestamp", "lat", "lon", "sog_kn", "cog_deg", "alt_m",
                          "speed_mps", "heading_deg", "rcs_m2", "classification",
                          "mmsi_hint", "kind"]
    for path, kw in [
        (OUT_REALTIME / "ais.ndjson", {"key_field": "mmsi", "ts_field": "ts_epoch_ms",
                                       "project_fields": AIS_DECIM_FIELDS}),
        (OUT_REALTIME / "plane_radar.ndjson", {"key_field": "track_id", "ts_field": "ts_epoch_ms",
                                               "project_fields": RADAR_DECIM_FIELDS}),
        (OUT_REALTIME / "coastal_radar.ndjson", {"key_field": "track_id", "ts_field": "ts_epoch_ms",
                                                 "project_fields": RADAR_DECIM_FIELDS}),
    ]:
        rep = maybe_decimate_ndjson(path, **kw)
        if rep:
            decim_reports.append(rep)
            counts[Path(rep["decimated"]).name] = rep["rows"] + 1
    mac_rep = maybe_decimate_mac_ndjson(OUT_REALTIME / "mac.ndjson")
    if mac_rep:
        decim_reports.append(mac_rep)
        counts[Path(mac_rep["decimated"]).name] = mac_rep["rows"] + 1
    if decim_reports:
        print("[S3] decimated companion files:")
        for r in decim_reports:
            print(f"  {Path(r['decimated']).name}  "
                  f"{r['source_bytes']/1024/1024:.1f}MB -> {r['decimated_bytes']/1024/1024:.1f}MB"
                  f"  ({r['rows']} rows)")

    return counts


# ---------------------------------------------------------------------------
# Static GeoJSON
# ---------------------------------------------------------------------------
def generate_static() -> dict[str, int]:
    counts: dict[str, int] = {}

    aoi_polygon = {
        "type": "Feature",
        "properties": {
            "featureId": "s3-aoi",
            "name": "S3 Area of Interest",
            "note": "Bounding polygon covering AALLOTAR transit, JUNCTION polygon and decoy survey box",
        },
        "geometry": {
            "type": "Polygon",
            "coordinates": [[
                [23.80, 59.50],
                [25.80, 59.50],
                [25.80, 60.30],
                [23.80, 60.30],
                [23.80, 59.50],
            ]],
        },
    }
    counts["area_of_interest.geojson"] = write_geojson(
        OUT_STATIC / "area_of_interest.geojson", [aoi_polygon],
        "s3-loitering-critical-infra/area_of_interest")

    sensors_fc = load_sensors()
    sensor_feats = [f for f in sensors_fc["features"]
                    if f["properties"]["sensorId"] in SENSORS_USED_IDS]
    counts["sensors_used.geojson"] = write_geojson(
        OUT_STATIC / "sensors_used.geojson", sensor_feats,
        "s3-loitering-critical-infra/sensors_used")

    infra_feats = [f for f in INFRA_FC["features"]
                   if f["properties"]["featureId"] in INFRA_USED_IDS]
    counts["infrastructure_used.geojson"] = write_geojson(
        OUT_STATIC / "infrastructure_used.geojson", infra_feats,
        "s3-loitering-critical-infra/infrastructure_used")

    # Decoy survey box (advisory geometry)
    half_lat = VENLA_LINE_LEN_DEG / 2
    half_lon = (3 * VENLA_LINE_SPACING_DEG_LON)
    decoy_feat = {
        "type": "Feature",
        "properties": {
            "featureId": "venla-survey-box",
            "name": "VENLA RESEARCH lawn-mower survey box (decoy)",
            "note": "≈ 3.7 NM SE of JUNCTION centroid, outside all 500 m infra buffers",
        },
        "geometry": {
            "type": "Polygon",
            "coordinates": [[
                [VENLA_BOX_LON - half_lon, VENLA_BOX_LAT - half_lat],
                [VENLA_BOX_LON + half_lon, VENLA_BOX_LAT - half_lat],
                [VENLA_BOX_LON + half_lon, VENLA_BOX_LAT + half_lat],
                [VENLA_BOX_LON - half_lon, VENLA_BOX_LAT + half_lat],
                [VENLA_BOX_LON - half_lon, VENLA_BOX_LAT - half_lat],
            ]],
        },
    }
    counts["decoy_survey_box.geojson"] = write_geojson(
        OUT_STATIC / "decoy_survey_box.geojson", [decoy_feat],
        "s3-loitering-critical-infra/decoy_survey_box")

    # Loiter pattern — synthesised as a LineString of generated waypoints
    loiter_feat = {
        "type": "Feature",
        "properties": {
            "featureId": "aal-loiter-track",
            "name": "AALLOTAR loiter track (drift + 4 overlapping ~600 m loops)",
            "loiter_start_ts": LOITER_T0.isoformat(),
            "loiter_end_ts": LOITER_T1.isoformat(),
            "centroid_lat": JUNCTION_CLAT,
            "centroid_lon": JUNCTION_CLON,
        },
        "geometry": {
            "type": "LineString",
            "coordinates": [[lon, lat] for (_, lat, lon) in AAL_LOITER_WAYPOINTS],
        },
    }
    counts["loiter_track.geojson"] = write_geojson(
        OUT_STATIC / "loiter_track.geojson", [loiter_feat],
        "s3-loitering-critical-infra/loiter_track")

    return counts


# ---------------------------------------------------------------------------
# Historical baseline — 4 weeks (28 days) of background MAC traffic at the
# primary cluster sensor MAC-PRK-COAST-01 (+ neighbours) plus a few normal
# AALLOTAR transits, with ZERO industrial-IoT OUIs.
# ---------------------------------------------------------------------------
def generate_historical() -> dict[str, int]:
    counts: dict[str, int] = {}
    sensors = sensor_lookup()

    hist_end = WINDOW_OPEN
    hist_start = hist_end - timedelta(days=28)

    # ---- Normal AALLOTAR transits (every 4 days, clean Helsinki -> Tallinn) ----
    ais_baseline: list[dict[str, Any]] = []
    mac_baseline_obs: list[MacObservation] = []
    transit_days = [hist_end - timedelta(days=d) for d in range(2, 28, 4)]
    for d in transit_days:
        day_dep = d.replace(hour=8, minute=0, second=0, microsecond=0)
        # Clean transit (no loiter) — 4 waypoints
        clean_wp = [
            (day_dep,                                    60.1450, 24.9100),
            (day_dep + timedelta(minutes=90),            60.0050, 24.5200),
            (day_dep + timedelta(minutes=180),           59.8800, 24.6000),
            (day_dep + timedelta(minutes=320),           59.5500, 24.7500),
        ]
        track = AisTrack(
            mmsi=230999401,
            waypoints=clean_wp,
            cadence_s=30.0,
            destination="EETLL",
            seed=6000 + d.day,
        )
        ais_baseline.extend(emit_ais(track))

        # Crew MAC co-occurrence on this transit
        for i, p in enumerate(crew_by_ship("MV AALLOTAR")):
            if not p.get("persistent"):
                continue
            em = MovingMacEmitter(
                mac=p["device_mac"],
                manufacturer=p["oui_vendor"],
                waypoints=clean_wp,
                active_windows=[(clean_wp[0][0], clean_wp[-1][0])],
                seed=6500 + d.day * 10 + i,
            )
            mac_baseline_obs.extend(
                simulate_moving_mac(em, sensors, session_window_s=300.0))

    # ---- Background MAC noise across the 28-day historical window ----
    # Strict consumer-only mix — by design, ZERO industrial-IoT OUIs.
    bg_seed = 7000
    one_day = timedelta(days=1)
    t = hist_start
    while t < hist_end:
        bg = consumer_background_macs(
            sensors, t, t + one_day,
            mac_count=45, cadence_s=420.0, seed=bg_seed,
        )
        mac_baseline_obs.extend(bg)
        bg_seed += 1
        t += one_day

    # Sanity: assert there are no industrial-IoT OUIs in the baseline
    industrial_prefixes = {"24:0A:C4", "F4:5E:AB", "A4:3C:5A"}
    for o in mac_baseline_obs:
        head = o.mac.upper()[:8]
        if head in industrial_prefixes:
            raise AssertionError(
                f"Industrial-IoT OUI {head} leaked into baseline (mac {o.mac})")

    counts["ais_baseline.ndjson"] = write_ndjson(
        OUT_HISTORICAL / "ais_baseline.ndjson", ais_baseline,
        "s3-loitering-critical-infra/ais_baseline")

    mac_nd = [m.to_ndjson() for m in mac_baseline_obs]
    counts["mac_baseline.ndjson"] = write_ndjson(
        OUT_HISTORICAL / "mac_baseline.ndjson", mac_nd,
        "s3-loitering-critical-infra/mac_baseline")
    mac_rows = [m.to_csv_row() for m in mac_baseline_obs]
    counts["mac_baseline.csv"] = write_csv(
        OUT_HISTORICAL / "mac_baseline.csv", MAC_CSV_HEADER, mac_rows,
        "s3-loitering-critical-infra/mac_baseline_sessions")
    return counts


# ---------------------------------------------------------------------------
# Driver
# ---------------------------------------------------------------------------
def dir_size_bytes(p: Path) -> int:
    return sum(f.stat().st_size for f in p.rglob("*") if f.is_file())


def main() -> int:
    print("[S3] generating realtime layer …")
    rt = generate_realtime()
    print("[S3] generating static layer …")
    st = generate_static()
    print("[S3] generating historical layer …")
    hi = generate_historical()

    print("\n===== Scenario 03 — Loitering Over Critical Infrastructure: generation summary =====")
    print("\n[realtime]")
    for k, v in rt.items():
        print(f"  {k:<30} rows={v:>8}")
    print("\n[static]")
    for k, v in st.items():
        print(f"  {k:<30} features={v:>5}")
    print("\n[historical]")
    for k, v in hi.items():
        print(f"  {k:<30} rows={v:>8}")

    rt_bytes = dir_size_bytes(OUT_REALTIME)
    st_bytes = dir_size_bytes(OUT_STATIC)
    hi_bytes = dir_size_bytes(OUT_HISTORICAL)
    print("\n[on disk]")
    print(f"  realtime   {rt_bytes:>12} bytes ({rt_bytes/1024/1024:.2f} MB)")
    print(f"  static     {st_bytes:>12} bytes ({st_bytes/1024:.2f} KB)")
    print(f"  historical {hi_bytes:>12} bytes ({hi_bytes/1024/1024:.2f} MB)")

    total_rt = sum(v for k, v in rt.items() if k.endswith((".ndjson", ".csv")))
    total_hi = sum(v for k, v in hi.items() if k.endswith((".ndjson", ".csv")))
    print(f"\n[totals] realtime rows={total_rt}  historical rows={total_hi}")
    print(f"[anchors] persistence hours (all slices) = {ANCHOR_TOTAL_HOURS:.1f}")
    print(f"[junction] centroid lat={JUNCTION_CLAT:.5f}, lon={JUNCTION_CLON:.5f}")
    print("[done] All files written under scenarios/03-loitering-critical-infra/data/")

    summary = {
        "scenario": "s3-loitering-critical-infra",
        "realtime": rt,
        "static": st,
        "historical": hi,
        "bytes": {"realtime": rt_bytes, "static": st_bytes, "historical": hi_bytes},
        "anchor_persistence_hours": ANCHOR_TOTAL_HOURS,
        "junction_centroid": {"lat": JUNCTION_CLAT, "lon": JUNCTION_CLON},
        "cluster_mac_count": len(CLUSTER_MACS),
        "anchor_mac_count": sum(1 for r in CLUSTER_MACS if r["is_anchor"]),
    }
    (SCENARIO_DIR / "data" / "_generation_summary.json").write_text(
        json.dumps(summary, indent=2), encoding="utf-8")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
