v0.10.2: monitoring-area bbox enforced at supervisor publish (was archive-only) (#PR_NUMBER_PLACEHOLDER)

Closes #86

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
malice 2026-06-05 20:34:10 -06:00 committed by GitHub
commit 1bebf2570b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 590 additions and 145 deletions

View file

@ -1,55 +1,22 @@
"""Archive-level monitoring-area bbox filter (v0.9.12).
"""Archive-level monitoring-area bbox filter integration (v0.9.12).
Events whose geometry falls entirely outside the system monitoring area are
dropped at archive INSERT time; null-geom events and border-straddlers are kept.
The filter is fail-open: an unparseable geometry is archived (with a warning),
never dropped.
After v0.10.2 the ``MonitoringArea`` / ``classify_geom`` / ``build_geom_json``
primitives moved to ``central.monitoring_area`` -- their pure-unit tests live in
``test_monitoring_area.py``. This file keeps the end-to-end check that archive's
``_process_message`` still drops out-of-bounds events (ACK + counter, no INSERT)
and that null-geom / no-area paths still archive.
"""
import json
import pytest
from unittest.mock import AsyncMock, MagicMock
from central.archive import ArchiveConsumer, MonitoringArea, _classify_geom
from central.archive import ArchiveConsumer
from central.monitoring_area import MonitoringArea
IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)
def _pt(lon, lat):
return json.dumps({"type": "Point", "coordinates": [lon, lat]})
class TestClassifyGeom:
def test_null_geom_always_kept(self):
assert _classify_geom(None, IDAHO) == "null-geom"
def test_no_area_keeps_everything(self):
assert _classify_geom(_pt(-114.0, 43.5), None) == "no-area"
def test_in_bounds_kept(self):
assert _classify_geom(_pt(-114.0, 43.5), IDAHO) == "in-bounds"
def test_out_of_bounds_dropped(self):
assert _classify_geom(_pt(-74.0, 40.7), IDAHO) == "out-of-bounds"
def test_border_straddling_polygon_kept(self):
# Spans the western edge (west=-117.5): partly out, partly in -> kept.
poly = json.dumps({
"type": "Polygon",
"coordinates": [[[-119, 42], [-116, 42], [-116, 43], [-119, 43], [-119, 42]]],
})
assert _classify_geom(poly, IDAHO) == "in-bounds"
def test_point_exactly_on_border_kept(self):
assert _classify_geom(_pt(-117.5, 43.0), IDAHO) == "in-bounds"
def test_unparseable_geom_kept(self):
assert _classify_geom("{not valid json", IDAHO) == "invalid-geom"
def test_unknown_geom_type_kept(self):
assert _classify_geom(json.dumps({"type": "Nonsense"}), IDAHO) == "invalid-geom"
def _make_msg(envelope):
msg = MagicMock()
msg.data = json.dumps(envelope).encode()

View file

@ -14,7 +14,7 @@ from central.adapters.firms import (
SATELLITE_SHORT,
_pixel_polygon,
)
from central.archive import _build_geom_sql
from central.monitoring_area import build_geom_json
from central.config_models import AdapterConfig
from central.models import Event, Geo
@ -553,7 +553,7 @@ class TestPixelPolygon:
class TestGeoGeometryRoundTripsThroughArchive:
"""Regression guard: FIRMS geo.geometry must reach _build_geom_sql as Polygon."""
"""Regression guard: FIRMS geo.geometry must reach build_geom_json as Polygon."""
@pytest.mark.asyncio
async def test_geo_geometry_round_trips_through_archive_path(
@ -572,7 +572,7 @@ class TestGeoGeometryRoundTripsThroughArchive:
# Simulate what archive does: serialize geo to dict and run it through
# the same helper that produces the PostGIS geom clause.
geo_dict = event.geo.model_dump()
sql_clause = _build_geom_sql(geo_dict)
sql_clause = build_geom_json(geo_dict)
assert sql_clause is not None
decoded = json.loads(sql_clause)
assert decoded["type"] == "Polygon"
@ -600,6 +600,6 @@ class TestGeoGeometryRoundTripsThroughArchive:
assert event.geo.geometry is None
geo_dict = event.geo.model_dump()
sql_clause = _build_geom_sql(geo_dict)
sql_clause = build_geom_json(geo_dict)
assert sql_clause is not None
assert json.loads(sql_clause)["type"] == "Point"

View file

@ -0,0 +1,142 @@
"""Unit tests for the shared monitoring-area module (v0.10.2).
Covers the four exports:
- ``MonitoringArea.as_box`` (shapely box construction)
- ``build_geom_json`` (all five Geo shapes the archive sees)
- ``classify_geom`` (the five verdicts)
- ``load_monitoring_area`` (DB read; None on missing-row / NULL-column)
The classify_geom + as_box assertions are the v0.9.12 archive bbox tests
lifted out of ``test_archive_bbox_filter.py`` and expanded with edge cases.
"""
import json
import pytest
from unittest.mock import AsyncMock, MagicMock
from shapely.geometry import Polygon, box as shapely_box
from central.monitoring_area import (
MonitoringArea,
build_geom_json,
classify_geom,
load_monitoring_area,
)
IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)
def _pt(lon, lat):
return json.dumps({"type": "Point", "coordinates": [lon, lat]})
class TestMonitoringAreaAsBox:
def test_as_box_returns_shapely_polygon(self):
b = IDAHO.as_box()
assert isinstance(b, Polygon)
def test_as_box_corners_match_west_south_east_north(self):
# shapely box(minx, miny, maxx, maxy) -> envelope (west, south, east, north)
expected = shapely_box(-117.5, 41.8, -111.0, 44.5)
assert IDAHO.as_box().equals(expected)
class TestBuildGeomJson:
def test_none_input_returns_none(self):
assert build_geom_json(None) is None
def test_empty_dict_returns_none(self):
assert build_geom_json({}) is None
def test_full_geometry_wins_over_bbox(self):
# If a real geometry is present it MUST be used verbatim (this is the
# v0.9.8 wfigs/tomtom invariant -- the map needs the real shape).
geom = {"type": "LineString", "coordinates": [[-114, 43], [-115, 44]]}
out = build_geom_json({
"geometry": geom,
"bbox": [-115, 43, -114, 44],
"centroid": [-114.5, 43.5],
})
assert json.loads(out) == geom
def test_bbox_rendered_as_closed_polygon(self):
out = build_geom_json({"bbox": [-117, 42, -111, 44]})
parsed = json.loads(out)
assert parsed["type"] == "Polygon"
coords = parsed["coordinates"][0]
# 5 points: 4 corners + closing duplicate
assert len(coords) == 5
assert coords[0] == coords[-1]
assert coords[0] == [-117, 42]
def test_centroid_rendered_as_point(self):
out = build_geom_json({"centroid": [-114.5, 43.5]})
assert json.loads(out) == {"type": "Point", "coordinates": [-114.5, 43.5]}
def test_partial_bbox_falls_through(self):
# An invalid 3-element bbox should not produce a 3-corner polygon;
# caller is expected to fall through to centroid or return None.
assert build_geom_json({"bbox": [-117, 42, -111]}) is None
def test_centroid_wins_when_bbox_invalid(self):
out = build_geom_json({"bbox": [-117, 42, -111], "centroid": [-114, 43]})
assert json.loads(out) == {"type": "Point", "coordinates": [-114, 43]}
class TestClassifyGeom:
def test_null_geom_always_kept(self):
assert classify_geom(None, IDAHO) == "null-geom"
def test_null_geom_kept_even_without_area(self):
assert classify_geom(None, None) == "null-geom"
def test_no_area_keeps_everything(self):
assert classify_geom(_pt(-114.0, 43.5), None) == "no-area"
def test_in_bounds_kept(self):
assert classify_geom(_pt(-114.0, 43.5), IDAHO) == "in-bounds"
def test_out_of_bounds_dropped(self):
assert classify_geom(_pt(-74.0, 40.7), IDAHO) == "out-of-bounds"
def test_border_straddling_polygon_kept(self):
# Spans the western edge (west=-117.5): partly out, partly in -> kept.
poly = json.dumps({
"type": "Polygon",
"coordinates": [[[-119, 42], [-116, 42], [-116, 43], [-119, 43], [-119, 42]]],
})
assert classify_geom(poly, IDAHO) == "in-bounds"
def test_point_exactly_on_border_kept(self):
assert classify_geom(_pt(-117.5, 43.0), IDAHO) == "in-bounds"
def test_unparseable_geom_keeps_failopen(self):
assert classify_geom("{not valid json", IDAHO) == "invalid-geom"
def test_unknown_geom_type_keeps_failopen(self):
assert classify_geom(json.dumps({"type": "Nonsense"}), IDAHO) == "invalid-geom"
@pytest.mark.asyncio
class TestLoadMonitoringArea:
async def test_returns_area_when_all_columns_set(self):
conn = MagicMock()
conn.fetchrow = AsyncMock(return_value={
"monitor_north": 44.5, "monitor_south": 41.8,
"monitor_east": -111.0, "monitor_west": -117.5,
})
area = await load_monitoring_area(conn)
assert area == IDAHO
async def test_returns_none_when_no_row(self):
conn = MagicMock()
conn.fetchrow = AsyncMock(return_value=None)
assert await load_monitoring_area(conn) is None
async def test_returns_none_when_any_column_null(self):
conn = MagicMock()
conn.fetchrow = AsyncMock(return_value={
"monitor_north": 44.5, "monitor_south": None,
"monitor_east": -111.0, "monitor_west": -117.5,
})
assert await load_monitoring_area(conn) is None

View file

@ -0,0 +1,186 @@
"""Supervisor publish-time monitoring-area filter (v0.10.2).
Covers the wire-up between ``subject_for`` and ``_publish_event`` in
``Supervisor._run_adapter_loop``: out-of-area drops, in-area publishes,
null-geom passes, invalid-geom fails open, and the refresh-loop reload.
"""
import asyncio
import logging
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock
import pytest
from central import supervisor as sup_mod
from central.config_models import AdapterConfig, EnrichmentConfig
from central.models import Event, Geo
from central.monitoring_area import MonitoringArea
IDAHO = MonitoringArea(north=44.5, south=41.8, east=-111.0, west=-117.5)
SPOKANE = (-117.4, 47.6)
BOISE = (-114.0, 43.5)
NYC = (-74.0, 40.7)
def _ev(eid: str, geo: Geo) -> Event:
return Event(
id=eid, adapter="mock", category="mock.test",
time=datetime.now(timezone.utc), geo=geo, data={},
)
class _MockAdapter:
requires_api_key = None
enrichment_locations = ()
def __init__(self, config, *_args) -> None:
self.config = config
self.cadence_s = config.cadence_s
self.events: list[Event] = []
self.published: set[str] = set()
self.done = asyncio.Event()
self._called = False
async def startup(self): ...
async def shutdown(self): ...
async def apply_config(self, c): ...
async def poll(self):
if not self._called:
self._called = True
for e in self.events:
yield e
self.done.set()
def is_published(self, eid): return eid in self.published
def mark_published(self, eid): self.published.add(eid)
def bump_last_seen(self, eid): ...
def sweep_old_ids(self): return 0
def subject_for(self, e): return f"central.test.{e.id}"
@pytest.fixture
def sup_factory():
"""Build a supervisor with mocked NATS + ConfigStore; spy on _publish_event."""
def _build(area: MonitoringArea | None):
nc = AsyncMock(); js = AsyncMock(); js.publish = AsyncMock()
nc.jetstream = MagicMock(return_value=js)
store = MagicMock()
store.list_streams = AsyncMock(return_value=[])
store.get_stream = AsyncMock(return_value=None)
store.set_adapter_last_error = AsyncMock()
store.get_api_key = AsyncMock(return_value=None)
store.get_monitoring_area = AsyncMock(return_value=area)
config_source = MagicMock()
config_source.get_enrichment_config = AsyncMock(return_value=EnrichmentConfig())
sup = sup_mod.Supervisor(
config_source=config_source, config_store=store,
nats_url="nats://x:4222", cloudevents_config=None,
enrichment_config=EnrichmentConfig(),
)
sup._nc = nc; sup._js = nc.jetstream()
sup._monitoring_area = area
sup._publish_event = AsyncMock()
sup._publish_meta = AsyncMock()
sup._adapters["mock"] = _MockAdapter
return sup
return _build
async def _drive(sup, events):
adapter = _MockAdapter(MagicMock(cadence_s=3600))
adapter.events = events
config = AdapterConfig(
name="mock", enabled=True, cadence_s=3600, settings={},
paused_at=None, updated_at=datetime.now(timezone.utc),
)
state = sup_mod.AdapterState(name="mock", config=config, adapter=adapter)
task = asyncio.create_task(sup._run_adapter_loop(state))
try:
await asyncio.wait_for(adapter.done.wait(), timeout=5.0)
await asyncio.sleep(0)
finally:
# Cancel ONLY this loop -- never touch sup._shutdown_event so callers
# can drive the same supervisor through multiple poll cycles.
task.cancel()
try: await task
except (asyncio.CancelledError, Exception): pass
return adapter
# --- verdict matrix -----------------------------------------------------------
@pytest.mark.asyncio
@pytest.mark.parametrize("area,geo,should_publish,should_drop", [
pytest.param(None, Geo(centroid=NYC), True, False, id="no-area-publishes-out-of-bbox"),
pytest.param(IDAHO, Geo(centroid=BOISE), True, False, id="in-bounds-publishes"),
pytest.param(IDAHO, Geo(centroid=NYC), False, True, id="out-of-bounds-NY-drops"),
pytest.param(IDAHO, Geo(centroid=SPOKANE), False, True, id="out-of-bounds-Spokane-drops"),
pytest.param(IDAHO, Geo(), True, False, id="null-geom-publishes"),
pytest.param(IDAHO, Geo(geometry={"type": "Nonsense"}), True, False, id="invalid-geom-fail-open"),
])
async def test_verdict(sup_factory, area, geo, should_publish, should_drop):
sup = sup_factory(area)
a = await _drive(sup, [_ev("e1", geo)])
if should_publish:
assert sup._publish_event.await_count == 1
assert a.published == {"e1"}
else:
sup._publish_event.assert_not_called()
# CRITICAL forward-only: drops MUST NOT mark_published, else widening
# the bbox can't re-deliver -- see [[feedback_dedup_forward_only]].
assert a.published == set()
assert (sup._dropped_publish == {"mock": 1}) is should_drop
@pytest.mark.asyncio
async def test_mixed_batch_partitions_correctly(sup_factory):
sup = sup_factory(IDAHO)
a = await _drive(sup, [
_ev("in1", Geo(centroid=BOISE)),
_ev("out1", Geo(centroid=NYC)),
_ev("null1", Geo()),
_ev("out2", Geo(centroid=SPOKANE)),
])
assert sup._publish_event.await_count == 2
assert a.published == {"in1", "null1"}
assert sup._dropped_publish == {"mock": 2}
@pytest.mark.asyncio
async def test_widening_area_re_publishes_previously_dropped(sup_factory):
"""Forward-only invariant: drops are reversible -- never mark_published."""
sup = sup_factory(IDAHO)
spokane = _ev("spokane", Geo(centroid=SPOKANE))
# The same supervisor drives two polls so the second sees the same id
# AFTER the bbox widens. Need a fresh adapter each call because _drive's
# MockAdapter signals done once per instance.
a1 = await _drive(sup, [spokane])
sup._publish_event.assert_not_called()
assert a1.published == set()
sup._monitoring_area = MonitoringArea(
north=48.5, south=41.8, east=-111.0, west=-118.0
)
a2 = await _drive(sup, [spokane])
assert sup._publish_event.await_count == 1
assert a2.published == {"spokane"}
@pytest.mark.asyncio
async def test_refresh_loop_reloads_area_and_logs_summary(
sup_factory, caplog, monkeypatch
):
sup = sup_factory(None)
sup._dropped_publish = {"mock": 7}
monkeypatch.setattr(sup_mod, "MONITORING_AREA_REFRESH_S", 0.05)
sup._config_store.get_monitoring_area = AsyncMock(return_value=IDAHO)
with caplog.at_level(logging.INFO):
task = asyncio.create_task(sup._refresh_monitoring_area_loop())
await asyncio.sleep(0.15)
sup._shutdown_event.set()
try: await asyncio.wait_for(task, timeout=1.0)
except (asyncio.TimeoutError, asyncio.CancelledError): task.cancel()
assert sup._monitoring_area == IDAHO
assert any(
"publish bbox filter drop summary" in r.message for r in caplog.records
)

View file

@ -17,7 +17,7 @@ import pytest
from central.adapter import SourceAdapter
from central.adapters.tomtom_flow import TomTomFlowAdapter
from central.archive import _build_geom_sql
from central.monitoring_area import build_geom_json
from central.config_models import AdapterConfig
from central.tomtom_flow_parse import (
_local_to_lonlat,
@ -83,10 +83,10 @@ def test_subject_for():
def test_archive_prefers_geo_geometry():
line = {"type": "LineString", "coordinates": [[-116.2, 43.6], [-116.1, 43.7]]}
# geometry present -> returned verbatim (not bbox/centroid)
out = _build_geom_sql({"geometry": line, "centroid": [-116.2, 43.6], "bbox": [-116.3, 43.5, -116.0, 43.8]})
out = build_geom_json({"geometry": line, "centroid": [-116.2, 43.6], "bbox": [-116.3, 43.5, -116.0, 43.8]})
assert json.loads(out) == line
# no geometry -> falls back to centroid Point (regression guard)
out2 = _build_geom_sql({"centroid": [-116.2, 43.6]})
out2 = build_geom_json({"centroid": [-116.2, 43.6]})
assert json.loads(out2)["type"] == "Point"