v0.10.2: monitoring-area bbox enforced at supervisor publish (was archive-only) (#PR_NUMBER_PLACEHOLDER)

Closes #86 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-10 11:54:37 +02:00 · 2026-06-05 20:34:10 -06:00 · 2026-06-05 20:34:10 -06:00 · 1bebf2570b
commit 1bebf2570b
parent 1d5548c24c
10 changed files with 590 additions and 145 deletions
--- a/tests/test_archive_bbox_filter.py
+++ b/tests/test_archive_bbox_filter.py
@ -1,55 +1,22 @@
-"""Archive-level monitoring-area bbox filter (v0.9.12).
+"""Archive-level monitoring-area bbox filter integration (v0.9.12).

-Events whose geometry falls entirely outside the system monitoring area are
-dropped at archive INSERT time; null-geom events and border-straddlers are kept.
-The filter is fail-open: an unparseable geometry is archived (with a warning),
-never dropped.
+After v0.10.2 the ``MonitoringArea`` / ``classify_geom`` / ``build_geom_json``
+primitives moved to ``central.monitoring_area`` -- their pure-unit tests live in
+``test_monitoring_area.py``. This file keeps the end-to-end check that archive's
+``_process_message`` still drops out-of-bounds events (ACK + counter, no INSERT)
+and that null-geom / no-area paths still archive.
 """
 import json

 import pytest
 from unittest.mock import AsyncMock, MagicMock

-from central.archive import ArchiveConsumer, MonitoringArea, _classify_geom
+from central.archive import ArchiveConsumer
+from central.monitoring_area import MonitoringArea

 IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)


-def _pt(lon, lat):
-    return json.dumps({"type": "Point", "coordinates": [lon, lat]})
-
-
-class TestClassifyGeom:
-    def test_null_geom_always_kept(self):
-        assert _classify_geom(None, IDAHO) == "null-geom"
-
-    def test_no_area_keeps_everything(self):
-        assert _classify_geom(_pt(-114.0, 43.5), None) == "no-area"
-
-    def test_in_bounds_kept(self):
-        assert _classify_geom(_pt(-114.0, 43.5), IDAHO) == "in-bounds"
-
-    def test_out_of_bounds_dropped(self):
-        assert _classify_geom(_pt(-74.0, 40.7), IDAHO) == "out-of-bounds"
-
-    def test_border_straddling_polygon_kept(self):
-        # Spans the western edge (west=-117.5): partly out, partly in -> kept.
-        poly = json.dumps({
-            "type": "Polygon",
-            "coordinates": [[[-119, 42], [-116, 42], [-116, 43], [-119, 43], [-119, 42]]],
-        })
-        assert _classify_geom(poly, IDAHO) == "in-bounds"
-
-    def test_point_exactly_on_border_kept(self):
-        assert _classify_geom(_pt(-117.5, 43.0), IDAHO) == "in-bounds"
-
-    def test_unparseable_geom_kept(self):
-        assert _classify_geom("{not valid json", IDAHO) == "invalid-geom"
-
-    def test_unknown_geom_type_kept(self):
-        assert _classify_geom(json.dumps({"type": "Nonsense"}), IDAHO) == "invalid-geom"
-
-
 def _make_msg(envelope):
    msg = MagicMock()
    msg.data = json.dumps(envelope).encode()
--- a/tests/test_firms.py
+++ b/tests/test_firms.py
@ -14,7 +14,7 @@ from central.adapters.firms import (
    SATELLITE_SHORT,
    _pixel_polygon,
 )
-from central.archive import _build_geom_sql
+from central.monitoring_area import build_geom_json
 from central.config_models import AdapterConfig
 from central.models import Event, Geo

@ -553,7 +553,7 @@ class TestPixelPolygon:


 class TestGeoGeometryRoundTripsThroughArchive:
-    """Regression guard: FIRMS geo.geometry must reach _build_geom_sql as Polygon."""
+    """Regression guard: FIRMS geo.geometry must reach build_geom_json as Polygon."""

    @pytest.mark.asyncio
    async def test_geo_geometry_round_trips_through_archive_path(
@ -572,7 +572,7 @@ class TestGeoGeometryRoundTripsThroughArchive:
        # Simulate what archive does: serialize geo to dict and run it through
        # the same helper that produces the PostGIS geom clause.
        geo_dict = event.geo.model_dump()
-        sql_clause = _build_geom_sql(geo_dict)
+        sql_clause = build_geom_json(geo_dict)
        assert sql_clause is not None
        decoded = json.loads(sql_clause)
        assert decoded["type"] == "Polygon"
@ -600,6 +600,6 @@ class TestGeoGeometryRoundTripsThroughArchive:
        assert event.geo.geometry is None

        geo_dict = event.geo.model_dump()
-        sql_clause = _build_geom_sql(geo_dict)
+        sql_clause = build_geom_json(geo_dict)
        assert sql_clause is not None
        assert json.loads(sql_clause)["type"] == "Point"
--- a/tests/test_monitoring_area.py
+++ b/tests/test_monitoring_area.py
@ -0,0 +1,142 @@
+"""Unit tests for the shared monitoring-area module (v0.10.2).
+
+Covers the four exports:
+  - ``MonitoringArea.as_box`` (shapely box construction)
+  - ``build_geom_json`` (all five Geo shapes the archive sees)
+  - ``classify_geom`` (the five verdicts)
+  - ``load_monitoring_area`` (DB read; None on missing-row / NULL-column)
+
+The classify_geom + as_box assertions are the v0.9.12 archive bbox tests
+lifted out of ``test_archive_bbox_filter.py`` and expanded with edge cases.
+"""
+
+import json
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+from shapely.geometry import Polygon, box as shapely_box
+
+from central.monitoring_area import (
+    MonitoringArea,
+    build_geom_json,
+    classify_geom,
+    load_monitoring_area,
+)
+
+IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)
+
+
+def _pt(lon, lat):
+    return json.dumps({"type": "Point", "coordinates": [lon, lat]})
+
+
+class TestMonitoringAreaAsBox:
+    def test_as_box_returns_shapely_polygon(self):
+        b = IDAHO.as_box()
+        assert isinstance(b, Polygon)
+
+    def test_as_box_corners_match_west_south_east_north(self):
+        # shapely box(minx, miny, maxx, maxy) -> envelope (west, south, east, north)
+        expected = shapely_box(-117.5, 41.8, -111.0, 44.5)
+        assert IDAHO.as_box().equals(expected)
+
+
+class TestBuildGeomJson:
+    def test_none_input_returns_none(self):
+        assert build_geom_json(None) is None
+
+    def test_empty_dict_returns_none(self):
+        assert build_geom_json({}) is None
+
+    def test_full_geometry_wins_over_bbox(self):
+        # If a real geometry is present it MUST be used verbatim (this is the
+        # v0.9.8 wfigs/tomtom invariant -- the map needs the real shape).
+        geom = {"type": "LineString", "coordinates": [[-114, 43], [-115, 44]]}
+        out = build_geom_json({
+            "geometry": geom,
+            "bbox": [-115, 43, -114, 44],
+            "centroid": [-114.5, 43.5],
+        })
+        assert json.loads(out) == geom
+
+    def test_bbox_rendered_as_closed_polygon(self):
+        out = build_geom_json({"bbox": [-117, 42, -111, 44]})
+        parsed = json.loads(out)
+        assert parsed["type"] == "Polygon"
+        coords = parsed["coordinates"][0]
+        # 5 points: 4 corners + closing duplicate
+        assert len(coords) == 5
+        assert coords[0] == coords[-1]
+        assert coords[0] == [-117, 42]
+
+    def test_centroid_rendered_as_point(self):
+        out = build_geom_json({"centroid": [-114.5, 43.5]})
+        assert json.loads(out) == {"type": "Point", "coordinates": [-114.5, 43.5]}
+
+    def test_partial_bbox_falls_through(self):
+        # An invalid 3-element bbox should not produce a 3-corner polygon;
+        # caller is expected to fall through to centroid or return None.
+        assert build_geom_json({"bbox": [-117, 42, -111]}) is None
+
+    def test_centroid_wins_when_bbox_invalid(self):
+        out = build_geom_json({"bbox": [-117, 42, -111], "centroid": [-114, 43]})
+        assert json.loads(out) == {"type": "Point", "coordinates": [-114, 43]}
+
+
+class TestClassifyGeom:
+    def test_null_geom_always_kept(self):
+        assert classify_geom(None, IDAHO) == "null-geom"
+
+    def test_null_geom_kept_even_without_area(self):
+        assert classify_geom(None, None) == "null-geom"
+
+    def test_no_area_keeps_everything(self):
+        assert classify_geom(_pt(-114.0, 43.5), None) == "no-area"
+
+    def test_in_bounds_kept(self):
+        assert classify_geom(_pt(-114.0, 43.5), IDAHO) == "in-bounds"
+
+    def test_out_of_bounds_dropped(self):
+        assert classify_geom(_pt(-74.0, 40.7), IDAHO) == "out-of-bounds"
+
+    def test_border_straddling_polygon_kept(self):
+        # Spans the western edge (west=-117.5): partly out, partly in -> kept.
+        poly = json.dumps({
+            "type": "Polygon",
+            "coordinates": [[[-119, 42], [-116, 42], [-116, 43], [-119, 43], [-119, 42]]],
+        })
+        assert classify_geom(poly, IDAHO) == "in-bounds"
+
+    def test_point_exactly_on_border_kept(self):
+        assert classify_geom(_pt(-117.5, 43.0), IDAHO) == "in-bounds"
+
+    def test_unparseable_geom_keeps_failopen(self):
+        assert classify_geom("{not valid json", IDAHO) == "invalid-geom"
+
+    def test_unknown_geom_type_keeps_failopen(self):
+        assert classify_geom(json.dumps({"type": "Nonsense"}), IDAHO) == "invalid-geom"
+
+
+@pytest.mark.asyncio
+class TestLoadMonitoringArea:
+    async def test_returns_area_when_all_columns_set(self):
+        conn = MagicMock()
+        conn.fetchrow = AsyncMock(return_value={
+            "monitor_north": 44.5, "monitor_south": 41.8,
+            "monitor_east": -111.0, "monitor_west": -117.5,
+        })
+        area = await load_monitoring_area(conn)
+        assert area == IDAHO
+
+    async def test_returns_none_when_no_row(self):
+        conn = MagicMock()
+        conn.fetchrow = AsyncMock(return_value=None)
+        assert await load_monitoring_area(conn) is None
+
+    async def test_returns_none_when_any_column_null(self):
+        conn = MagicMock()
+        conn.fetchrow = AsyncMock(return_value={
+            "monitor_north": 44.5, "monitor_south": None,
+            "monitor_east": -111.0, "monitor_west": -117.5,
+        })
+        assert await load_monitoring_area(conn) is None
--- a/tests/test_supervisor_publish_filter.py
+++ b/tests/test_supervisor_publish_filter.py
@ -0,0 +1,186 @@
+"""Supervisor publish-time monitoring-area filter (v0.10.2).
+
+Covers the wire-up between ``subject_for`` and ``_publish_event`` in
+``Supervisor._run_adapter_loop``: out-of-area drops, in-area publishes,
+null-geom passes, invalid-geom fails open, and the refresh-loop reload.
+"""
+import asyncio
+import logging
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from central import supervisor as sup_mod
+from central.config_models import AdapterConfig, EnrichmentConfig
+from central.models import Event, Geo
+from central.monitoring_area import MonitoringArea
+
+IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)
+SPOKANE = (-117.4, 47.6)
+BOISE = (-114.0, 43.5)
+NYC = (-74.0, 40.7)
+
+
+def _ev(eid: str, geo: Geo) -> Event:
+    return Event(
+        id=eid, adapter="mock", category="mock.test",
+        time=datetime.now(timezone.utc), geo=geo, data={},
+    )
+
+
+class _MockAdapter:
+    requires_api_key = None
+    enrichment_locations = ()
+
+    def __init__(self, config, *_args) -> None:
+        self.config = config
+        self.cadence_s = config.cadence_s
+        self.events: list[Event] = []
+        self.published: set[str] = set()
+        self.done = asyncio.Event()
+        self._called = False
+
+    async def startup(self): ...
+    async def shutdown(self): ...
+    async def apply_config(self, c): ...
+
+    async def poll(self):
+        if not self._called:
+            self._called = True
+            for e in self.events:
+                yield e
+            self.done.set()
+
+    def is_published(self, eid): return eid in self.published
+    def mark_published(self, eid): self.published.add(eid)
+    def bump_last_seen(self, eid): ...
+    def sweep_old_ids(self): return 0
+    def subject_for(self, e): return f"central.test.{e.id}"
+
+
+@pytest.fixture
+def sup_factory():
+    """Build a supervisor with mocked NATS + ConfigStore; spy on _publish_event."""
+    def _build(area: MonitoringArea | None):
+        nc = AsyncMock(); js = AsyncMock(); js.publish = AsyncMock()
+        nc.jetstream = MagicMock(return_value=js)
+        store = MagicMock()
+        store.list_streams = AsyncMock(return_value=[])
+        store.get_stream = AsyncMock(return_value=None)
+        store.set_adapter_last_error = AsyncMock()
+        store.get_api_key = AsyncMock(return_value=None)
+        store.get_monitoring_area = AsyncMock(return_value=area)
+        config_source = MagicMock()
+        config_source.get_enrichment_config = AsyncMock(return_value=EnrichmentConfig())
+        sup = sup_mod.Supervisor(
+            config_source=config_source, config_store=store,
+            nats_url="nats://x:4222", cloudevents_config=None,
+            enrichment_config=EnrichmentConfig(),
+        )
+        sup._nc = nc; sup._js = nc.jetstream()
+        sup._monitoring_area = area
+        sup._publish_event = AsyncMock()
+        sup._publish_meta = AsyncMock()
+        sup._adapters["mock"] = _MockAdapter
+        return sup
+    return _build
+
+
+async def _drive(sup, events):
+    adapter = _MockAdapter(MagicMock(cadence_s=3600))
+    adapter.events = events
+    config = AdapterConfig(
+        name="mock", enabled=True, cadence_s=3600, settings={},
+        paused_at=None, updated_at=datetime.now(timezone.utc),
+    )
+    state = sup_mod.AdapterState(name="mock", config=config, adapter=adapter)
+    task = asyncio.create_task(sup._run_adapter_loop(state))
+    try:
+        await asyncio.wait_for(adapter.done.wait(), timeout=5.0)
+        await asyncio.sleep(0)
+    finally:
+        # Cancel ONLY this loop -- never touch sup._shutdown_event so callers
+        # can drive the same supervisor through multiple poll cycles.
+        task.cancel()
+        try: await task
+        except (asyncio.CancelledError, Exception): pass
+    return adapter
+
+
+# --- verdict matrix -----------------------------------------------------------
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("area,geo,should_publish,should_drop", [
+    pytest.param(None, Geo(centroid=NYC), True, False, id="no-area-publishes-out-of-bbox"),
+    pytest.param(IDAHO, Geo(centroid=BOISE), True, False, id="in-bounds-publishes"),
+    pytest.param(IDAHO, Geo(centroid=NYC), False, True, id="out-of-bounds-NY-drops"),
+    pytest.param(IDAHO, Geo(centroid=SPOKANE), False, True, id="out-of-bounds-Spokane-drops"),
+    pytest.param(IDAHO, Geo(), True, False, id="null-geom-publishes"),
+    pytest.param(IDAHO, Geo(geometry={"type": "Nonsense"}), True, False, id="invalid-geom-fail-open"),
+])
+async def test_verdict(sup_factory, area, geo, should_publish, should_drop):
+    sup = sup_factory(area)
+    a = await _drive(sup, [_ev("e1", geo)])
+    if should_publish:
+        assert sup._publish_event.await_count == 1
+        assert a.published == {"e1"}
+    else:
+        sup._publish_event.assert_not_called()
+        # CRITICAL forward-only: drops MUST NOT mark_published, else widening
+        # the bbox can't re-deliver -- see [[feedback_dedup_forward_only]].
+        assert a.published == set()
+    assert (sup._dropped_publish == {"mock": 1}) is should_drop
+
+
+@pytest.mark.asyncio
+async def test_mixed_batch_partitions_correctly(sup_factory):
+    sup = sup_factory(IDAHO)
+    a = await _drive(sup, [
+        _ev("in1", Geo(centroid=BOISE)),
+        _ev("out1", Geo(centroid=NYC)),
+        _ev("null1", Geo()),
+        _ev("out2", Geo(centroid=SPOKANE)),
+    ])
+    assert sup._publish_event.await_count == 2
+    assert a.published == {"in1", "null1"}
+    assert sup._dropped_publish == {"mock": 2}
+
+
+@pytest.mark.asyncio
+async def test_widening_area_re_publishes_previously_dropped(sup_factory):
+    """Forward-only invariant: drops are reversible -- never mark_published."""
+    sup = sup_factory(IDAHO)
+    spokane = _ev("spokane", Geo(centroid=SPOKANE))
+    # The same supervisor drives two polls so the second sees the same id
+    # AFTER the bbox widens. Need a fresh adapter each call because _drive's
+    # MockAdapter signals done once per instance.
+    a1 = await _drive(sup, [spokane])
+    sup._publish_event.assert_not_called()
+    assert a1.published == set()
+    sup._monitoring_area = MonitoringArea(
+        north=48.5, south=41.8, east=-111.0, west=-118.0
+    )
+    a2 = await _drive(sup, [spokane])
+    assert sup._publish_event.await_count == 1
+    assert a2.published == {"spokane"}
+
+
+@pytest.mark.asyncio
+async def test_refresh_loop_reloads_area_and_logs_summary(
+    sup_factory, caplog, monkeypatch
+):
+    sup = sup_factory(None)
+    sup._dropped_publish = {"mock": 7}
+    monkeypatch.setattr(sup_mod, "MONITORING_AREA_REFRESH_S", 0.05)
+    sup._config_store.get_monitoring_area = AsyncMock(return_value=IDAHO)
+    with caplog.at_level(logging.INFO):
+        task = asyncio.create_task(sup._refresh_monitoring_area_loop())
+        await asyncio.sleep(0.15)
+        sup._shutdown_event.set()
+        try: await asyncio.wait_for(task, timeout=1.0)
+        except (asyncio.TimeoutError, asyncio.CancelledError): task.cancel()
+    assert sup._monitoring_area == IDAHO
+    assert any(
+        "publish bbox filter drop summary" in r.message for r in caplog.records
+    )
--- a/tests/test_tomtom_flow.py
+++ b/tests/test_tomtom_flow.py
@ -17,7 +17,7 @@ import pytest

 from central.adapter import SourceAdapter
 from central.adapters.tomtom_flow import TomTomFlowAdapter
-from central.archive import _build_geom_sql
+from central.monitoring_area import build_geom_json
 from central.config_models import AdapterConfig
 from central.tomtom_flow_parse import (
    _local_to_lonlat,
@ -83,10 +83,10 @@ def test_subject_for():
 def test_archive_prefers_geo_geometry():
    line = {"type": "LineString", "coordinates": [[-116.2, 43.6], [-116.1, 43.7]]}
    # geometry present -> returned verbatim (not bbox/centroid)
-    out = _build_geom_sql({"geometry": line, "centroid": [-116.2, 43.6], "bbox": [-116.3, 43.5, -116.0, 43.8]})
+    out = build_geom_json({"geometry": line, "centroid": [-116.2, 43.6], "bbox": [-116.3, 43.5, -116.0, 43.8]})
    assert json.loads(out) == line
    # no geometry -> falls back to centroid Point (regression guard)
-    out2 = _build_geom_sql({"centroid": [-116.2, 43.6]})
+    out2 = build_geom_json({"centroid": [-116.2, 43.6]})
    assert json.loads(out2)["type"] == "Point"