"""Scenario 01 — AIS Dark Near Cable: data generator.

Reads canonical catalogs, builds AALLOTAR's Sillamäe → Hanko track per
spec waypoints, drops a 33 m 20 s AIS dark window over the Estlink
corridor, emits crew + burner + background MAC sessions, runs the plane
radar through the dark window, and adds a decoy VENLA RESEARCH transit.

All shared logic comes from `generators/`; this file only composes it.
"""
from __future__ import annotations

import json
import random
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any

REPO_ROOT = Path(__file__).resolve().parents[2]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from generators.common import (  # noqa: E402
    ambient_mmsi,
    crew_by_ship,
    load_infrastructure,
    load_sensors,
    maybe_decimate_mac_ndjson,
    maybe_decimate_ndjson,
    sensor_lookup,
    write_csv,
    write_geojson,
    write_ndjson,
)
from generators.ais_generator import AisTrack, ais_snapshot_geojson, emit_ais  # noqa: E402
from generators.mac_generator import (  # noqa: E402
    MAC_CSV_HEADER,
    MovingMacEmitter,
    generate_background_macs,
    simulate_moving_mac,
)
from generators.radar_generator import RadarTrack, emit_radar  # noqa: E402

UTC = timezone.utc
SCENARIO_DIR = Path(__file__).resolve().parent
OUT_REALTIME = SCENARIO_DIR / "data" / "realtime"
OUT_STATIC = SCENARIO_DIR / "data" / "static"
OUT_HISTORICAL = SCENARIO_DIR / "data" / "historical"

# ---------------------------------------------------------------------------
# Anchor times (per harmonized spec)
# ---------------------------------------------------------------------------
WINDOW_OPEN = datetime(2025, 3, 18, 5, 30, 0, tzinfo=UTC)
WINDOW_CLOSE = datetime(2025, 3, 18, 8, 0, 0, tzinfo=UTC)
AIS_DARK_START = datetime(2025, 3, 18, 6, 25, 10, tzinfo=UTC)
AIS_DARK_END = datetime(2025, 3, 18, 6, 58, 30, tzinfo=UTC)
BURNER_START = datetime(2025, 3, 18, 6, 32, 40, tzinfo=UTC)
BURNER_END = datetime(2025, 3, 18, 6, 54, 10, tzinfo=UTC)
BURNER_HEL_PORT_START = datetime(2025, 3, 18, 6, 48, 0, tzinfo=UTC)
BURNER_HEL_PORT_END = datetime(2025, 3, 18, 6, 55, 0, tzinfo=UTC)

# AALLOTAR (MMSI 230999401) waypoints (t_rel offsets from WINDOW_OPEN)
# Route follows the northbound Gulf of Finland shipping lane at ~59.90–60.05 °N.
# Previous waypoints (60.17–60.21 °N near lon 24–26 °E) placed the vessel on the
# Espoo/Helsinki coast — corrected to keep the track in open water.
AAL_WAYPOINTS = [
    (WINDOW_OPEN + timedelta(seconds=0),     60.0500, 27.5500),  # entry (mid-Gulf east)
    (WINDOW_OPEN + timedelta(seconds=850),   60.0200, 27.0500),  # Kotka lane
    (WINDOW_OPEN + timedelta(seconds=2030),  59.9800, 26.0500),  # Porvoo lane
    (WINDOW_OPEN + timedelta(seconds=3310),  59.9700, 25.1800),  # AIS last (dark start)
    (WINDOW_OPEN + timedelta(seconds=4080),  59.9600, 24.8500),  # Estlink-1 crossing (dark)
    (WINDOW_OPEN + timedelta(seconds=4680),  59.9550, 24.7200),  # loiter over cable (dark)
    (WINDOW_OPEN + timedelta(seconds=5310),  59.9500, 24.6200),  # AIS resumes
    (WINDOW_OPEN + timedelta(seconds=6360),  59.9300, 24.3000),  # heading west
    (WINDOW_OPEN + timedelta(seconds=7500),  59.9000, 23.7000),  # south of Inkoo
    (WINDOW_OPEN + timedelta(seconds=9000),  59.8200, 23.0500),  # Hanko approach
]

# VENLA RESEARCH (MMSI 230888011): enters T+10:00 from W, survey loiter T+45→T+120
# Kept south of 60.10 °N so she stays in open water.
VEN_T0 = WINDOW_OPEN + timedelta(minutes=10)
VENLA_WAYPOINTS = [
    (VEN_T0,                                    59.9800, 23.1000),  # entry W
    (VEN_T0 + timedelta(minutes=20),            59.9800, 23.6000),  # transit east
    (VEN_T0 + timedelta(minutes=35),            59.9700, 24.6000),  # approaching survey area
    (VEN_T0 + timedelta(minutes=45),            59.9600, 24.8400),  # at survey area (≈ Estlink)
    (VEN_T0 + timedelta(minutes=75),            59.9750, 24.8800),  # slow N drift
    (VEN_T0 + timedelta(minutes=105),           59.9600, 24.8200),  # slow S drift
    (VEN_T0 + timedelta(minutes=125),           59.9500, 24.7000),  # end survey
    (VEN_T0 + timedelta(minutes=140),           59.9300, 24.4000),  # depart west
]

# VENLA invented crew (catalog OUIs only). Not added to personas.json — scenario-local.
VENLA_CREW = [
    {"id": "P-VEN-MASTER",     "ship": "MV VENLA RESEARCH", "role": "Master",
     "device_mac": "A4:83:E7:5C:9B:71", "device": "iPhone",        "oui_vendor": "Apple"},
    {"id": "P-VEN-CHOFF",      "ship": "MV VENLA RESEARCH", "role": "Chief Officer",
     "device_mac": "38:F9:D3:11:22:71", "device": "Samsung phone", "oui_vendor": "Samsung"},
    {"id": "P-VEN-RESEARCHER", "ship": "MV VENLA RESEARCH", "role": "Researcher",
     "device_mac": "38:F9:D3:11:22:81", "device": "Samsung phone (new device)",
     "oui_vendor": "Samsung"},
]

BURNER_MAC = "7E:2A:F1:09:44:C8"
BURNER_VENDOR = None  # null per spec

# Catalog sensor IDs that appear in this scenario
SENSORS_USED_IDS = {
    "MAC-KTK-COAST-01", "MAC-KTK-PORT-01",
    "MAC-PRV-COAST-01",
    "MAC-HEL-COAST-01", "MAC-HEL-PORT-03",
    "MAC-INK-COAST-01",
    "MAC-HKO-COAST-01",
    "MAC-AIR-PLN-01",
    "RAD-PLN-01",
}

# Infrastructure features used (Estlink corridors + buffers + EEZ)
INFRA_USED_IDS = {
    "estlink-1", "estlink-1-buffer",
    "estlink-2", "estlink-2-buffer",
    "finnish-eez-gof",
}


def build_ambient_ais(n_ships: int, seed: int) -> list[dict[str, Any]]:
    """Generate ambient AIS traffic across the Gulf of Finland.

    Each ambient ship has a straight-line track with 15 s cadence to bulk
    realtime volume into the 500k–1M target band. Uses the synthetic 9XX
    ambient MMSI block from `catalogs/personas.json` (FI/EE flag mix).
    """
    rng = random.Random(seed)
    out: list[dict[str, Any]] = []
    for i in range(n_ships):
        # Pick random entry/exit roughly along the Gulf
        eastbound = rng.random() < 0.5
        lat0 = rng.uniform(59.65, 60.30)
        lat1 = lat0 + rng.uniform(-0.10, 0.10)
        if eastbound:
            lon0, lon1 = 22.5, 27.5
        else:
            lon0, lon1 = 27.5, 22.5
        # Random start offset so not all ships span the entire window
        t_start = WINDOW_OPEN + timedelta(minutes=rng.uniform(0, 90))
        t_end = min(WINDOW_CLOSE, t_start + timedelta(minutes=rng.uniform(60, 150)))
        if t_end <= t_start:
            continue
        # Ambient MMSI in the synthetic 9XX block (see ambient_mmsi_blocks)
        flag_roll = rng.random()
        flag = "FI" if flag_roll < 0.7 else ("EE" if flag_roll < 0.9 else "OTHER")
        mmsi = ambient_mmsi(rng, flag)
        track = AisTrack(
            mmsi=mmsi,
            waypoints=[(t_start, lat0, lon0), (t_end, lat1, lon1)],
            cadence_s=15.0,
            destination="FIHEL" if eastbound else "EETLL",
            seed=seed + i,
        )
        out.extend(emit_ais(track))
    return out


def make_crew_emitter(mac: str, vendor: str | None, waypoints, active=None,
                      seed: int = 0) -> MovingMacEmitter:
    return MovingMacEmitter(
        mac=mac,
        manufacturer=vendor,
        waypoints=waypoints,
        active_windows=active,
        seed=seed,
    )


def generate_realtime() -> dict[str, int]:
    """Generate the realtime layer. Returns stream → row counts (incl. disclaimer)."""
    sensors = sensor_lookup()
    counts: dict[str, int] = {}

    # ----- AIS (subject + decoy + ambient) -----
    aal_track = AisTrack(
        mmsi=230999401,
        waypoints=AAL_WAYPOINTS,
        cadence_s=3.0,
        dark_windows=[(AIS_DARK_START, AIS_DARK_END)],
        destination="FIHAN",
        nav_status=0,
        seed=101,
    )
    aal_msgs = emit_ais(aal_track)

    venla_track = AisTrack(
        mmsi=230888011,
        waypoints=VENLA_WAYPOINTS,
        cadence_s=3.0,
        destination="FIHEL",
        nav_status=7,  # research-vessel survey proxy per spec
        speed_jitter_kn=0.3,
        course_jitter_deg=3.0,
        seed=202,
    )
    venla_msgs = emit_ais(venla_track)

    ambient_msgs = build_ambient_ais(n_ships=1500, seed=303)

    ais_all = aal_msgs + venla_msgs + ambient_msgs
    counts["ais.ndjson"] = write_ndjson(
        OUT_REALTIME / "ais.ndjson", ais_all, "s1-ais-dark-near-cable/ais")

    snapshot_features = ais_snapshot_geojson(ais_all)
    counts["ais_snapshot.geojson"] = write_geojson(
        OUT_REALTIME / "ais_snapshot.geojson", snapshot_features,
        "s1-ais-dark-near-cable/ais_snapshot")

    # ----- Plane radar (no gap) -----
    radar_track = RadarTrack(
        track_id="PLN-TRK-0007",
        sensor_id="RAD-PLN-01",
        waypoints=AAL_WAYPOINTS,
        cadence_s=4.0,
        classification="surface_large",
        rcs_m2=8200.0,
        confidence=0.92,
        seed=404,
    )
    radar_msgs = emit_radar(radar_track)
    # mmsi_hint links the radar fix to the AIS track (null while AIS is dark)
    for r in radar_msgs:
        ts = datetime.strptime(r["timestamp"], "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC)
        r["mmsi_hint"] = None if AIS_DARK_START <= ts < AIS_DARK_END else 230999401
        r["platform"] = "MAC-AIR-PLN-01"
    counts["plane_radar.ndjson"] = write_ndjson(
        OUT_REALTIME / "plane_radar.ndjson", radar_msgs,
        "s1-ais-dark-near-cable/plane_radar")

    # ----- MAC (crew + burner + decoy crew + background) -----
    mac_obs = []

    # AAL crew — persistent persona MACs
    aal_crew = crew_by_ship("MV AALLOTAR")
    for i, p in enumerate(aal_crew):
        if not p.get("persistent"):
            continue
        em = make_crew_emitter(
            mac=p["device_mac"],
            vendor=p["oui_vendor"],
            waypoints=AAL_WAYPOINTS,
            active=[(WINDOW_OPEN, WINDOW_CLOSE)],
            seed=500 + i,
        )
        mac_obs.extend(simulate_moving_mac(em, sensors, session_window_s=120.0))

    # Burner MAC— only during dark window, tied to AAL track
    burner = MovingMacEmitter(
        mac=BURNER_MAC,
        manufacturer=BURNER_VENDOR,
        waypoints=AAL_WAYPOINTS,
        active_windows=[(BURNER_START, BURNER_END)],
        seed=666,
    )
    # Use a tighter session window so the burner produces ~22 min of dense hits
    mac_obs.extend(simulate_moving_mac(burner, sensors, session_window_s=60.0,
                                       rssi_threshold=-105.0))

    # VENLA decoy crew — persistent during her transit
    venla_active = [(VENLA_WAYPOINTS[0][0], VENLA_WAYPOINTS[-1][0])]
    for i, p in enumerate(VENLA_CREW):
        em = make_crew_emitter(
            mac=p["device_mac"],
            vendor=p["oui_vendor"],
            waypoints=VENLA_WAYPOINTS,
            active=venla_active,
            seed=700 + i,
        )
        mac_obs.extend(simulate_moving_mac(em, sensors, session_window_s=120.0))

    # Background noise across coastal/port sensors for the whole window
    bg = generate_background_macs(sensors, WINDOW_OPEN, WINDOW_CLOSE,
                                  mac_count=150, cadence_s=120.0, seed=42)
    mac_obs.extend(bg)

    mac_nd = [m.to_ndjson() for m in mac_obs]
    counts["mac.ndjson"] = write_ndjson(
        OUT_REALTIME / "mac.ndjson", mac_nd, "s1-ais-dark-near-cable/mac")

    mac_rows = [m.to_csv_row() for m in mac_obs]
    counts["mac.csv"] = write_csv(
        OUT_REALTIME / "mac.csv", MAC_CSV_HEADER, mac_rows,
        "s1-ais-dark-near-cable/mac_sessions")

    # Produce decimated companion files for any realtime NDJSON > 20 MB
    decim_reports = []
    AIS_DECIM_FIELDS = ["timestamp", "lat", "lon", "sog_kn", "cog_deg", "nav_status"]
    RADAR_DECIM_FIELDS = ["timestamp", "lat", "lon", "sog_kn", "cog_deg", "alt_m",
                          "speed_mps", "heading_deg", "rcs_m2", "classification",
                          "mmsi_hint", "kind"]
    for path, kw in [
        (OUT_REALTIME / "ais.ndjson", {"key_field": "mmsi", "ts_field": "ts_epoch_ms",
                                       "project_fields": AIS_DECIM_FIELDS}),
        (OUT_REALTIME / "plane_radar.ndjson", {"key_field": "track_id", "ts_field": "ts_epoch_ms",
                                               "project_fields": RADAR_DECIM_FIELDS}),
    ]:
        rep = maybe_decimate_ndjson(path, **kw)
        if rep:
            decim_reports.append(rep)
            counts[Path(rep["decimated"]).name] = rep["rows"] + 1  # +1 disclaimer
    mac_rep = maybe_decimate_mac_ndjson(OUT_REALTIME / "mac.ndjson")
    if mac_rep:
        decim_reports.append(mac_rep)
        counts[Path(mac_rep["decimated"]).name] = mac_rep["rows"] + 1
    if decim_reports:
        print("[S1] decimated companion files:")
        for r in decim_reports:
            print(f"  {Path(r['decimated']).name}  "
                  f"{r['source_bytes']/1024/1024:.1f}MB -> {r['decimated_bytes']/1024/1024:.1f}MB"
                  f"  ({r['rows']} rows)")

    return counts


def generate_static() -> dict[str, int]:
    """Subset / build static GeoJSON assets."""
    counts: dict[str, int] = {}

    # area_of_interest — Gulf of Finland polygon containing AAL track + cable corridor
    aoi_polygon = {
        "type": "Feature",
        "properties": {
            "featureId": "s1-aoi",
            "name": "S1 Area of Interest",
            "note": "Bounding polygon covering AALLOTAR track and Estlink corridor",
        },
        "geometry": {
            "type": "Polygon",
            "coordinates": [[
                [22.5, 59.65],
                [28.0, 59.65],
                [28.0, 60.55],
                [22.5, 60.55],
                [22.5, 59.65],
            ]],
        },
    }
    counts["area_of_interest.geojson"] = write_geojson(
        OUT_STATIC / "area_of_interest.geojson", [aoi_polygon],
        "s1-ais-dark-near-cable/area_of_interest")

    # sensors_used — subset of sensors catalog
    sensors_fc = load_sensors()
    sensor_feats = [f for f in sensors_fc["features"]
                    if f["properties"]["sensorId"] in SENSORS_USED_IDS]
    counts["sensors_used.geojson"] = write_geojson(
        OUT_STATIC / "sensors_used.geojson", sensor_feats,
        "s1-ais-dark-near-cable/sensors_used")

    # infrastructure_used — subset of infrastructure catalog
    infra_fc = load_infrastructure()
    infra_feats = [f for f in infra_fc["features"]
                   if f["properties"]["featureId"] in INFRA_USED_IDS]
    counts["infrastructure_used.geojson"] = write_geojson(
        OUT_STATIC / "infrastructure_used.geojson", infra_feats,
        "s1-ais-dark-near-cable/infrastructure_used")

    return counts


def generate_historical() -> dict[str, int]:
    """Generate 6 daily 1-hour AAL transits + 6 VENLA transits + MAC baseline.

    Each baseline transit replays a clean (no-dark) AALLOTAR or VENLA pass over
    the same area at 30 s AIS cadence. MAC sensors observe their respective
    crew MACs co-occurring with the AIS, plus background noise.
    """
    counts: dict[str, int] = {}
    sensors = sensor_lookup()
    rng = random.Random(9001)

    ais_baseline: list[dict[str, Any]] = []
    mac_baseline_obs = []

    # Per-day baseline transits across the 6 days preceding the realtime window
    baseline_days = [WINDOW_OPEN - timedelta(days=d) for d in range(1, 7)]
    for d in baseline_days:
        # AALLOTAR transit ~ same hour
        day_start = d.replace(hour=5, minute=30, second=0, microsecond=0)
        # Shorten waypoints to a clean Sillamäe→Hanko at constant ~12 kn (1 hr 30 min)
        aal_hist_wp = [
            (day_start + timedelta(seconds=int((t - WINDOW_OPEN).total_seconds())),
             lat, lon)
            for (t, lat, lon) in AAL_WAYPOINTS
        ]
        aal_hist = AisTrack(
            mmsi=230999401,
            waypoints=aal_hist_wp,
            cadence_s=30.0,
            destination="FIHAN",
            seed=8000 + d.day,
        )
        ais_baseline.extend(emit_ais(aal_hist))

        # VENLA baseline transit (every other day to keep it small)
        if d.day % 2 == 0:
            ven_day_start = day_start + timedelta(minutes=10)
            ven_hist_wp = [
                (ven_day_start + timedelta(seconds=int((t - VEN_T0).total_seconds())),
                 lat, lon)
                for (t, lat, lon) in VENLA_WAYPOINTS
            ]
            ven_hist = AisTrack(
                mmsi=230888011,
                waypoints=ven_hist_wp,
                cadence_s=30.0,
                destination="FIHEL",
                nav_status=0,
                seed=9000 + d.day,
            )
            ais_baseline.extend(emit_ais(ven_hist))

        # Crew MAC co-occurrence on this day for AAL
        for i, p in enumerate(crew_by_ship("MV AALLOTAR")):
            if not p.get("persistent"):
                continue
            em = MovingMacEmitter(
                mac=p["device_mac"],
                manufacturer=p["oui_vendor"],
                waypoints=aal_hist_wp,
                active_windows=[(aal_hist_wp[0][0], aal_hist_wp[-1][0])],
                seed=8500 + d.day * 10 + i,
            )
            mac_baseline_obs.extend(
                simulate_moving_mac(em, sensors, session_window_s=240.0))

        # Per-day background MAC noise across a longer slice of the day
        bg = generate_background_macs(
            sensors, day_start - timedelta(hours=2),
            day_start + timedelta(hours=5),
            mac_count=80, cadence_s=300.0, seed=10000 + d.day,
        )
        mac_baseline_obs.extend(bg)

    counts["ais_baseline.ndjson"] = write_ndjson(
        OUT_HISTORICAL / "ais_baseline.ndjson", ais_baseline,
        "s1-ais-dark-near-cable/ais_baseline")

    mac_nd = [m.to_ndjson() for m in mac_baseline_obs]
    counts["mac_baseline.ndjson"] = write_ndjson(
        OUT_HISTORICAL / "mac_baseline.ndjson", mac_nd,
        "s1-ais-dark-near-cable/mac_baseline")

    mac_rows = [m.to_csv_row() for m in mac_baseline_obs]
    counts["mac_baseline.csv"] = write_csv(
        OUT_HISTORICAL / "mac_baseline.csv", MAC_CSV_HEADER, mac_rows,
        "s1-ais-dark-near-cable/mac_baseline_sessions")

    # Stash rng usage to keep linter quiet
    _ = rng
    return counts


def dir_size_bytes(p: Path) -> int:
    return sum(f.stat().st_size for f in p.rglob("*") if f.is_file())


def main() -> int:
    print("[S1] generating realtime layer …")
    rt = generate_realtime()
    print("[S1] generating static layer …")
    st = generate_static()
    print("[S1] generating historical layer …")
    hi = generate_historical()

    print("\n===== Scenario 01 — AIS Dark Near Cable: generation summary =====")
    print("\n[realtime]")
    for k, v in rt.items():
        print(f"  {k:<28} rows={v:>8}")
    print("\n[static]")
    for k, v in st.items():
        print(f"  {k:<28} features={v:>5}")
    print("\n[historical]")
    for k, v in hi.items():
        print(f"  {k:<28} rows={v:>8}")

    rt_bytes = dir_size_bytes(OUT_REALTIME)
    st_bytes = dir_size_bytes(OUT_STATIC)
    hi_bytes = dir_size_bytes(OUT_HISTORICAL)
    print("\n[on disk]")
    print(f"  realtime   {rt_bytes:>12} bytes ({rt_bytes/1024/1024:.2f} MB)")
    print(f"  static     {st_bytes:>12} bytes ({st_bytes/1024:.2f} KB)")
    print(f"  historical {hi_bytes:>12} bytes ({hi_bytes/1024/1024:.2f} MB)")

    total_rt = sum(v for k, v in rt.items() if k.endswith((".ndjson", ".csv")))
    total_hi = sum(v for k, v in hi.items() if k.endswith((".ndjson", ".csv")))
    print(f"\n[totals] realtime rows={total_rt}  historical rows={total_hi}")
    print("[done] All files written under scenarios/01-ais-dark-near-cable/data/")

    # Emit a small machine-readable summary alongside the prints
    summary = {
        "scenario": "s1-ais-dark-near-cable",
        "realtime": rt,
        "static": st,
        "historical": hi,
        "bytes": {"realtime": rt_bytes, "static": st_bytes, "historical": hi_bytes},
    }
    (SCENARIO_DIR / "data" / "_generation_summary.json").write_text(
        json.dumps(summary, indent=2), encoding="utf-8")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
