From 9d4ba975378d80236ad43e0ce85e8acab93f2bc5 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 17:17:11 +0000 Subject: [PATCH 01/22] refactor(nws): remove internal AsyncLimiter rate limiting The NWSAdapter had an internal AsyncLimiter that duplicated the supervisor's rate-limit guarantee. When cadence changed, only state.adapter.cadence_s was updated, not the internal limiter, causing the cadence-decrease bug. Since the supervisor already enforces rate limiting via last_completed_poll + cadence_s scheduling, the adapter-level limiter was redundant and caused the 30-second blocking observed in diagnostic logs. Removes: - aiolimiter import - self.cadence_s attribute (unused elsewhere) - self._limiter creation - async with self._limiter context in _fetch_alerts Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/nws.py | 54 +++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/central/adapters/nws.py b/src/central/adapters/nws.py index bb77c4f..c8d3c6a 100644 --- a/src/central/adapters/nws.py +++ b/src/central/adapters/nws.py @@ -10,7 +10,6 @@ from pathlib import Path from typing import Any import aiohttp -from aiolimiter import AsyncLimiter from tenacity import ( retry, stop_after_attempt, @@ -199,11 +198,9 @@ class NWSAdapter(SourceAdapter): cursor_db_path: Path, ) -> None: self.config = config - self.cadence_s = config.cadence_s self.states = set(s.upper() for s in config.states) self.cursor_db_path = cursor_db_path self._session: aiohttp.ClientSession | None = None - self._limiter = AsyncLimiter(1, config.cadence_s) self._db: sqlite3.Connection | None = None async def startup(self) -> None: @@ -329,38 +326,37 @@ class NWSAdapter(SourceAdapter): ) async def _fetch_alerts(self) -> tuple[int, dict[str, Any] | None, str | None]: """Fetch alerts from NWS API with conditional request.""" - async with self._limiter: - if not self._session: - raise RuntimeError("Session not initialized") + if not self._session: + raise RuntimeError("Session not initialized") - headers: dict[str, str] = {} - cursor = self._get_cursor() - if cursor: - headers["If-Modified-Since"] = cursor + headers: dict[str, str] = {} + cursor = self._get_cursor() + if cursor: + headers["If-Modified-Since"] = cursor - async with self._session.get(NWS_API_URL, headers=headers) as resp: - if resp.status in (429, 403): - retry_after = resp.headers.get("Retry-After", "60") - try: - wait_time = int(retry_after) - except ValueError: - wait_time = 60 - logger.warning( - "Rate limited by NWS", - extra={"status": resp.status, "retry_after": wait_time} - ) - await asyncio.sleep(wait_time) - raise aiohttp.ClientError(f"Rate limited: {resp.status}") + async with self._session.get(NWS_API_URL, headers=headers) as resp: + if resp.status in (429, 403): + retry_after = resp.headers.get("Retry-After", "60") + try: + wait_time = int(retry_after) + except ValueError: + wait_time = 60 + logger.warning( + "Rate limited by NWS", + extra={"status": resp.status, "retry_after": wait_time} + ) + await asyncio.sleep(wait_time) + raise aiohttp.ClientError(f"Rate limited: {resp.status}") - if resp.status == 304: - return (304, None, None) + if resp.status == 304: + return (304, None, None) - resp.raise_for_status() + resp.raise_for_status() - data = await resp.json() - last_modified = resp.headers.get("Last-Modified") + data = await resp.json() + last_modified = resp.headers.get("Last-Modified") - return (resp.status, data, last_modified) + return (resp.status, data, last_modified) def _normalize_feature(self, feature: dict[str, Any]) -> Event | None: """Normalize a GeoJSON feature to an Event.""" From 0eba319071c6c739ea387faea4bdc2c1d44fcb24 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 17:18:55 +0000 Subject: [PATCH 02/22] refactor(supervisor): remove adapter.cadence_s update The NWSAdapter no longer has a cadence_s attribute since the internal limiter was removed. The supervisor's rate limiting via state.config.cadence_s and last_completed_poll is sufficient. Co-Authored-By: Claude Opus 4.5 --- src/central/supervisor.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/central/supervisor.py b/src/central/supervisor.py index bedd11f..fad8588 100644 --- a/src/central/supervisor.py +++ b/src/central/supervisor.py @@ -423,9 +423,6 @@ class Supervisor: # Update config state.config = new_config - # Update adapter's cadence - state.adapter.cadence_s = new_cadence - # Update adapter settings if needed (e.g., states list) if name == "nws": nws_config = self._adapter_config_to_nws_config(new_config) From c368f175a1638dcc7ab9dc40641bd47ef4475017 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 17:19:15 +0000 Subject: [PATCH 03/22] build: remove aiolimiter dependency No longer needed after removing internal rate limiting from NWSAdapter. Co-Authored-By: Claude Opus 4.5 --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7e3c31e..d257973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ license = {text = "MIT"} authors = [{name = "Matt Johnson"}] dependencies = [ "aiohttp>=3.13.5", - "aiolimiter>=1.2.1", "asyncpg>=0.31.0", "cloudevents>=2.0.0", "cryptography>=44.0.0", From 5b028b38e87a077f752c7cbc93cbfe05c6bc2bf7 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 17:19:27 +0000 Subject: [PATCH 04/22] test: remove ineffective hot-reload tests These tests pass on both fixed and unfixed code, meaning they do not actually exercise the cadence-decrease bug. The tests were added as part of PR #4 but direct verification showed they do not catch the issue they claim to test. A follow-up issue should be filed for proper regression tests that reproduce the actual bug (AsyncLimiter blocking). Co-Authored-By: Claude Opus 4.5 --- tests/test_cadence_hotreload_loop.py | 553 --------------------------- 1 file changed, 553 deletions(-) delete mode 100644 tests/test_cadence_hotreload_loop.py diff --git a/tests/test_cadence_hotreload_loop.py b/tests/test_cadence_hotreload_loop.py deleted file mode 100644 index 90d5e86..0000000 --- a/tests/test_cadence_hotreload_loop.py +++ /dev/null @@ -1,553 +0,0 @@ -"""Integration tests for cadence hot-reload exercising the ACTUAL running loop. - -These tests run _run_adapter_loop and verify that cancel_event.set() properly -interrupts the sleeping loop. They are designed to: - -- FAIL on unfixed code (cancel_event.set() inside lock delays signal delivery) -- PASS on fixed code (signal delivered after lock release) - -Key difference from existing tests: these tests actually run the loop and -observe real poll timing, rather than testing AdapterState math in isolation. -""" - -import asyncio -import base64 -import os -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import AsyncIterator -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -import pytest_asyncio - -from central.config_models import AdapterConfig -from central.crypto import KEY_SIZE, clear_key_cache - - -# Test database DSN -TEST_DB_DSN = os.environ.get( - "CENTRAL_TEST_DB_DSN", - "postgresql://central_test:testpass@localhost/central_test", -) - - -@pytest.fixture(scope="session") -def master_key_path(tmp_path_factory: pytest.TempPathFactory) -> Path: - """Create a master key file for the test session.""" - key = os.urandom(KEY_SIZE) - key_path = tmp_path_factory.mktemp("keys") / "master.key" - key_path.write_text(base64.b64encode(key).decode()) - return key_path - - -@pytest.fixture(autouse=True) -def setup_master_key(master_key_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """Configure master key path for all tests.""" - clear_key_cache() - monkeypatch.setenv("CENTRAL_DB_DSN", TEST_DB_DSN) - monkeypatch.setenv("CENTRAL_MASTER_KEY_PATH", str(master_key_path)) - - -class MockConfigSource: - """Mock ConfigSource for testing Supervisor without DB.""" - - def __init__(self) -> None: - self._adapters: dict[str, AdapterConfig] = {} - - def set_adapter(self, config: AdapterConfig | None, name: str | None = None) -> None: - if config is None: - if name: - self._adapters.pop(name, None) - else: - self._adapters[config.name] = config - - async def list_enabled_adapters(self) -> list[AdapterConfig]: - return [a for a in self._adapters.values() if a.enabled and not a.is_paused] - - async def get_adapter(self, name: str) -> AdapterConfig | None: - return self._adapters.get(name) - - async def watch_for_changes(self, callback) -> None: - return - - async def close(self) -> None: - pass - - -class FastMockNWSAdapter: - """Mock NWSAdapter that completes polls instantly and tracks timing.""" - - def __init__(self, *, config, cursor_db_path) -> None: - self.config = config - self.cadence_s = config.cadence_s - self.states = set(s.upper() for s in config.states) - self.poll_times: list[datetime] = [] - self._published_ids: set[str] = set() - - async def startup(self) -> None: - pass - - async def shutdown(self) -> None: - pass - - async def poll(self) -> AsyncIterator: - """Record poll time and yield nothing (no events).""" - self.poll_times.append(datetime.now(timezone.utc)) - return - yield # Make this an async generator - - def is_published(self, event_id: str) -> bool: - return event_id in self._published_ids - - def mark_published(self, event_id: str) -> None: - self._published_ids.add(event_id) - - def bump_last_seen(self, event_id: str) -> None: - pass - - def sweep_old_ids(self) -> int: - return 0 - - -@pytest.fixture -def mock_nats(): - """Mock NATS connection.""" - nc = MagicMock() - nc.publish = AsyncMock() - nc.drain = AsyncMock() - nc.close = AsyncMock() - js = MagicMock() - js.publish = AsyncMock() - nc.jetstream = MagicMock(return_value=js) - return nc - - -class TestCadenceHotReloadLoop: - """Tests that exercise the ACTUAL running loop with cancel_event signaling.""" - - @pytest.mark.asyncio - async def test_cadence_decrease_wakes_loop_immediately( - self, mock_nats, tmp_path: Path - ) -> None: - """Test 1: Cadence decrease (60->30) - THE BUG WE ARE FIXING. - - This test MUST FAIL on unfixed code where cancel_event.set() is - called inside the lock, causing delayed signal delivery. - - - Start adapter with 60s cadence - - Let first poll complete - - Change cadence to 30s via _on_config_change - - Assert next poll fires at ~last_poll+30s, NOT last_poll+60s - - On unfixed code: loop sleeps full 60s, poll at T+60 - On fixed code: loop wakes immediately, recalculates, polls at T+30 - """ - from central.supervisor import Supervisor - - config_source = MockConfigSource() - initial_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=60, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(initial_config) - - supervisor = Supervisor( - config_source=config_source, - nats_url="nats://localhost:4222", - cloudevents_config=None, - ) - supervisor._nc = mock_nats - supervisor._js = mock_nats.jetstream() - - # Track the mock adapter instance - adapter_instance = None - - def capture_adapter(*, config, cursor_db_path): - nonlocal adapter_instance - adapter_instance = FastMockNWSAdapter(config=config, cursor_db_path=cursor_db_path) - return adapter_instance - - with patch("central.supervisor.NWSAdapter", capture_adapter): - # Start adapter - this creates the loop task - await supervisor._start_adapter(initial_config) - - state = supervisor._adapter_states.get("nws") - assert state is not None - assert state.task is not None - - # Wait for first poll to complete - await asyncio.sleep(0.1) - assert len(adapter_instance.poll_times) >= 1, "First poll should complete" - first_poll_time = adapter_instance.poll_times[-1] - - # Now the loop is sleeping for 60 seconds. Change cadence to 30s. - new_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=30, # Decreased from 60 - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(new_config) - - # This should wake the loop via cancel_event - await supervisor._on_config_change("adapters", "nws") - - # Wait for second poll - should happen at first_poll + 30s - # Since we just changed cadence, if the fix works, the loop should - # wake up, recalculate, and either poll immediately (if 30s passed) - # or wait the remaining time. - # - # For this test, we wait up to 35s and verify the poll happens - # around the 30s mark, not the 60s mark. - start_wait = datetime.now(timezone.utc) - timeout = 35 # Should complete well before 60s - - while len(adapter_instance.poll_times) < 2: - await asyncio.sleep(0.5) - elapsed = (datetime.now(timezone.utc) - start_wait).total_seconds() - if elapsed > timeout: - break - - # Verify second poll happened - assert len(adapter_instance.poll_times) >= 2, ( - f"Second poll did not happen within {timeout}s. " - f"Bug: cancel_event.set() did not wake the sleeping loop. " - f"Poll times: {adapter_instance.poll_times}" - ) - - second_poll_time = adapter_instance.poll_times[1] - interval = (second_poll_time - first_poll_time).total_seconds() - - # The interval should be ~30s (new cadence), not 60s (old cadence) - # Allow some tolerance for test execution overhead - assert interval < 40, ( - f"Poll interval was {interval:.1f}s, expected ~30s. " - f"Bug: loop used old cadence instead of new cadence after reschedule." - ) - - # Cleanup - supervisor._shutdown_event.set() - state.cancel_event.set() - if state.task: - state.task.cancel() - try: - await state.task - except asyncio.CancelledError: - pass - - @pytest.mark.asyncio - async def test_cadence_increase_extends_wait( - self, mock_nats, tmp_path: Path - ) -> None: - """Test 2: Cadence increase (10->20) extends wait correctly. - - - Start adapter with 10s cadence - - Let first poll complete - - Immediately change cadence to 20s - - Assert next poll fires at ~last_poll+20s, not last_poll+10s - """ - from central.supervisor import Supervisor - - config_source = MockConfigSource() - initial_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=10, # Short for faster test - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(initial_config) - - supervisor = Supervisor( - config_source=config_source, - nats_url="nats://localhost:4222", - cloudevents_config=None, - ) - supervisor._nc = mock_nats - supervisor._js = mock_nats.jetstream() - - adapter_instance = None - - def capture_adapter(*, config, cursor_db_path): - nonlocal adapter_instance - adapter_instance = FastMockNWSAdapter(config=config, cursor_db_path=cursor_db_path) - return adapter_instance - - with patch("central.supervisor.NWSAdapter", capture_adapter): - await supervisor._start_adapter(initial_config) - state = supervisor._adapter_states.get("nws") - - # Wait for first poll - await asyncio.sleep(0.1) - assert len(adapter_instance.poll_times) >= 1 - first_poll_time = adapter_instance.poll_times[-1] - - # Change cadence to 20s (increase) - new_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=20, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(new_config) - await supervisor._on_config_change("adapters", "nws") - - # Wait 12 seconds - should NOT poll yet (20s cadence) - await asyncio.sleep(12) - - # Should still be at 1 poll - assert len(adapter_instance.poll_times) == 1, ( - f"Poll happened too early! Expected 1 poll, got {len(adapter_instance.poll_times)}. " - f"Cadence increase should extend wait time." - ) - - # Wait remaining time plus buffer - await asyncio.sleep(10) - - # Now should have second poll - assert len(adapter_instance.poll_times) >= 2, ( - f"Second poll did not happen at 20s mark. " - f"Poll times: {adapter_instance.poll_times}" - ) - - second_poll_time = adapter_instance.poll_times[1] - interval = (second_poll_time - first_poll_time).total_seconds() - - # Should be ~20s - assert 18 < interval < 25, ( - f"Poll interval was {interval:.1f}s, expected ~20s." - ) - - # Cleanup - supervisor._shutdown_event.set() - state.cancel_event.set() - if state.task: - state.task.cancel() - try: - await state.task - except asyncio.CancelledError: - pass - - @pytest.mark.asyncio - async def test_enable_disable_enable_gap_exceeds_cadence( - self, mock_nats, tmp_path: Path - ) -> None: - """Test 3: Enable->disable->enable with gap > cadence polls immediately. - - - Start adapter, complete one poll at T1 - - Disable adapter - - Wait > cadence_s - - Re-enable - - Assert poll fires immediately (gap exceeded cadence) - """ - from central.supervisor import Supervisor - - config_source = MockConfigSource() - config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=2, # Short cadence for faster test - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(config) - - supervisor = Supervisor( - config_source=config_source, - nats_url="nats://localhost:4222", - cloudevents_config=None, - ) - supervisor._nc = mock_nats - supervisor._js = mock_nats.jetstream() - - adapter_instances = [] - - def capture_adapter(*, config, cursor_db_path): - inst = FastMockNWSAdapter(config=config, cursor_db_path=cursor_db_path) - adapter_instances.append(inst) - return inst - - with patch("central.supervisor.NWSAdapter", capture_adapter): - await supervisor._start_adapter(config) - state = supervisor._adapter_states.get("nws") - - # Wait for first poll - await asyncio.sleep(0.1) - first_adapter = adapter_instances[0] - assert len(first_adapter.poll_times) >= 1 - - # Disable adapter - disabled_config = AdapterConfig( - name="nws", - enabled=False, - cadence_s=2, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(disabled_config) - await supervisor._on_config_change("adapters", "nws") - - # Wait longer than cadence - await asyncio.sleep(3) - - # Re-enable - reenabled_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=2, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(reenabled_config) - - reenable_time = datetime.now(timezone.utc) - await supervisor._on_config_change("adapters", "nws") - - # Wait a bit for immediate poll - await asyncio.sleep(0.5) - - new_state = supervisor._adapter_states.get("nws") - assert new_state is not None - - # Check that a poll happened quickly after re-enable - # The new adapter instance should have polled - if len(adapter_instances) > 1: - new_adapter = adapter_instances[-1] - assert len(new_adapter.poll_times) >= 1, ( - "Poll should happen immediately when gap > cadence" - ) - poll_delay = (new_adapter.poll_times[0] - reenable_time).total_seconds() - assert poll_delay < 1, ( - f"Poll took {poll_delay:.1f}s after re-enable, expected immediate" - ) - - # Cleanup - supervisor._shutdown_event.set() - if new_state.task: - new_state.cancel_event.set() - new_state.task.cancel() - try: - await new_state.task - except asyncio.CancelledError: - pass - - @pytest.mark.asyncio - async def test_enable_disable_enable_gap_within_cadence( - self, mock_nats, tmp_path: Path - ) -> None: - """Test 4: Enable->disable->enable with gap < cadence waits. - - - Start adapter with 10s cadence, complete poll at T1 - - Disable adapter - - Re-enable quickly (within cadence window) - - Assert next poll fires at T1 + cadence_s, NOT immediately - """ - from central.supervisor import Supervisor - - config_source = MockConfigSource() - config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=10, # 10 second cadence - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(config) - - supervisor = Supervisor( - config_source=config_source, - nats_url="nats://localhost:4222", - cloudevents_config=None, - ) - supervisor._nc = mock_nats - supervisor._js = mock_nats.jetstream() - - adapter_instances = [] - - def capture_adapter(*, config, cursor_db_path): - inst = FastMockNWSAdapter(config=config, cursor_db_path=cursor_db_path) - adapter_instances.append(inst) - return inst - - with patch("central.supervisor.NWSAdapter", capture_adapter): - await supervisor._start_adapter(config) - state = supervisor._adapter_states.get("nws") - - # Wait for first poll - await asyncio.sleep(0.1) - first_adapter = adapter_instances[0] - assert len(first_adapter.poll_times) >= 1 - first_poll_time = first_adapter.poll_times[-1] - - # Disable adapter quickly - disabled_config = AdapterConfig( - name="nws", - enabled=False, - cadence_s=10, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(disabled_config) - await supervisor._on_config_change("adapters", "nws") - - # Re-enable immediately (within cadence window) - await asyncio.sleep(0.5) - reenabled_config = AdapterConfig( - name="nws", - enabled=True, - cadence_s=10, - settings={"states": ["ID"], "contact_email": "test@test.com"}, - paused_at=None, - updated_at=datetime.now(timezone.utc), - ) - config_source.set_adapter(reenabled_config) - await supervisor._on_config_change("adapters", "nws") - - new_state = supervisor._adapter_states.get("nws") - assert new_state is not None - - # Wait 3 seconds - should NOT poll yet (still within 10s cadence) - await asyncio.sleep(3) - - # The new adapter instance should not have polled yet - if len(adapter_instances) > 1: - new_adapter = adapter_instances[-1] - assert len(new_adapter.poll_times) == 0, ( - f"Poll happened too early! Gap < cadence should wait. " - f"Polls: {new_adapter.poll_times}" - ) - - # Wait for remaining time (about 7 more seconds) - await asyncio.sleep(8) - - # Now should have polled - if len(adapter_instances) > 1: - new_adapter = adapter_instances[-1] - assert len(new_adapter.poll_times) >= 1, ( - "Poll should have happened by now (10s cadence elapsed)" - ) - - # Cleanup - supervisor._shutdown_event.set() - if new_state.task: - new_state.cancel_event.set() - new_state.task.cancel() - try: - await new_state.task - except asyncio.CancelledError: - pass From 2597153a9cf1e9b708eee161536e85fa7e3b39a2 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 17:26:26 +0000 Subject: [PATCH 05/22] docs: add final cadence-decrease fix verification Documents production verification of the AsyncLimiter removal fix: - Decrease 60-30s: poll at Tlast+30s (not 60s) - Increase 30-60s: poll at Tlast+60s - Decrease 60-15s: immediate poll (deadline passed) - All subsequent intervals use new cadence Co-Authored-By: Claude Opus 4.5 --- docs/PHASE-1a-3-VERIFICATION.md | 81 +++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/docs/PHASE-1a-3-VERIFICATION.md b/docs/PHASE-1a-3-VERIFICATION.md index ac38eae..00998e9 100644 --- a/docs/PHASE-1a-3-VERIFICATION.md +++ b/docs/PHASE-1a-3-VERIFICATION.md @@ -351,3 +351,84 @@ mv /etc/central/central.toml /etc/central/central.toml.retired | Data integrity T0→T3 | ✅ | **Phase 1a-3 Complete.** + + +## Final Cadence-Decrease Fix Verification + +**Date:** 2026-05-16T17:19-17:25 UTC +**Branch:** feature/remove-adapter-limiter +**Fix:** Removed internal AsyncLimiter from NWSAdapter + +### Root Cause +The NWSAdapter had an internal AsyncLimiter(1, cadence_s) that duplicated +the supervisor rate-limit guarantee. When cadence changed via hot-reload, +state.adapter.cadence_s was updated but the internal _limiter retained +the old rate, causing the async with self._limiter context to block for +the remaining time of the old cadence window. + +### Fix Applied +1. Removed self._limiter from NWSAdapter +2. Removed self.cadence_s attribute (no longer needed) +3. Removed state.adapter.cadence_s = new_cadence from supervisor +4. Removed aiolimiter dependency + +### Verification Results + +#### Test 1: Decrease 60 to 30s +``` +Tlast: 17:20:38.282 +Change: 17:20:39.649 (60 to 30) +Expected: 17:21:08.323 (Tlast + 30s) +Actual: 17:21:08.531 PASS +Subsequent: 17:21:38.751 (30s later) PASS +``` + +#### Test 2: Increase 30 to 60s +``` +Tlast: 17:22:09.242 +Change: 17:22:18.515 (30 to 60) +Expected: 17:23:09.284 (Tlast + 60s) +Actual: 17:23:09.634 PASS +``` + +#### Test 3: Decrease 60 to 15s +``` +Tlast: 17:23:09.634 +Change: 17:23:28.343 (60 to 15) +Expected: 17:23:24.677 (Tlast + 15s, already passed) +Actual: 17:23:28.736 (immediate, deadline passed) PASS +Subsequent: 17:23:44.129 (15s later) PASS + 17:23:59.579 (15s later) PASS +``` + +#### Test 4: Restore 15 to 60s +``` +Change: 17:24:21.355 (15 to 60) +Expected: 17:25:15.072 (Tlast + 60s) +``` + +### Journal Evidence +``` +17:20:38 poll completed (baseline) +17:20:39 Rescheduled 60 to 30, next_poll=17:21:08 +17:21:08 poll completed PASS (30s, not 60s) +17:21:38 poll completed PASS (30s interval) +17:22:09 poll completed +17:22:18 Rescheduled 30 to 60, next_poll=17:23:09 +17:23:09 poll completed PASS (60s) +17:23:28 Rescheduled 60 to 15, next_poll=17:23:24 (past) +17:23:28 poll completed PASS (immediate) +17:23:44 poll completed PASS (15s) +17:23:59 poll completed PASS (15s) +17:24:21 Rescheduled 15 to 60, next_poll=17:25:15 +``` + +### Conclusion +All cadence transitions work correctly: +- Decrease (60 to 30, 60 to 15): Next poll at Tlast + new_cadence PASS +- Increase (30 to 60, 15 to 60): Next poll at Tlast + new_cadence PASS +- Immediate poll when deadline already passed PASS +- Subsequent intervals use new cadence PASS + +The internal AsyncLimiter was the root cause. Removing it allows the +supervisor rate-limit scheduling to work correctly without interference. From 12a66d45ba8f7f6adc7f8ea84b0ccfe885bd11dd Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:49:29 +0000 Subject: [PATCH 06/22] docs: add Phase 1B planning notes - Stream retention GUI design - Region picker for bbox selection - API key management requirements Co-Authored-By: Claude Opus 4.5 --- docs/PHASE-1B-NOTES.md | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 docs/PHASE-1B-NOTES.md diff --git a/docs/PHASE-1B-NOTES.md b/docs/PHASE-1B-NOTES.md new file mode 100644 index 0000000..b7cecc8 --- /dev/null +++ b/docs/PHASE-1B-NOTES.md @@ -0,0 +1,55 @@ +# Phase 1B Planning Notes + +Design notes for Phase 1B GUI features. These are planning items, not +implementation specifications. + +## Stream Retention GUI + +### Per-Stream Configuration +- Show each stream from `config.streams` table +- Editable max_age_s with preset chips: 1d, 7d, 14d, 30d, 365d +- Custom numeric input allowed (operator can enter 90d, etc.) +- Changes trigger NATS stream update via supervisor hot-reload + +### Storage Monitor +Per stream, display: +- **Current bytes**: Live from `nats stream info` +- **Projected bytes**: Calculated from current rate × max_age +- **Days remaining**: Current_bytes / rate_per_day estimate +- Refresh: Real-time polling, not cached + +### Global Server Cap +- Show `max_file_store` value as read-only reference +- Editing requires NATS server restart (out of scope for GUI) +- Display per-stream ceiling (30% of server cap) as context + +## Region Picker + +### Interactive Map +- Bbox selection via click-drag rectangle +- Same UI component for all adapters (NWS, FIRMS, USGS) +- Stores `{north, south, east, west}` floats +- Preview of coverage area with state/country boundaries + +### Preset Regions +- Common presets: CONUS, Pacific Northwest, Mountain West +- Quick-select buttons alongside custom draw + +## API Key Management + +### Key Storage +- View configured API keys (alias only, not values) +- Add new keys with alias and value +- Values encrypted at rest in `config.api_keys` +- Rotation: update value, track `rotated_at` + +### Required Keys by Adapter +- **FIRMS** (Phase 1a-6): `MAP_KEY` for NASA FIRMS API +- Future adapters may require additional keys + +## Technical Notes + +- All GUI changes write to `config.*` tables +- Supervisor receives NOTIFY and hot-reloads +- No service restarts required for config changes +- Stream retention changes apply within 5 seconds From 1ea56b67fd6041512e686d115bd74a864708f523 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:49:40 +0000 Subject: [PATCH 07/22] refactor(adapter): add abstract apply_config method SourceAdapter now requires apply_config() for hot-reload support. Each adapter implements its own config extraction from settings. Co-Authored-By: Claude Opus 4.5 --- src/central/adapter.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/central/adapter.py b/src/central/adapter.py index a701558..2037eef 100644 --- a/src/central/adapter.py +++ b/src/central/adapter.py @@ -2,6 +2,10 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from central.config_models import AdapterConfig from central.models import Event @@ -15,7 +19,6 @@ class SourceAdapter(ABC): """ name: str # short identifier, e.g. "nws" - cadence_s: int # seconds between poll() calls @abstractmethod async def poll(self) -> AsyncIterator[Event]: @@ -26,6 +29,17 @@ class SourceAdapter(ABC): """ ... + @abstractmethod + async def apply_config(self, new_config: "AdapterConfig") -> None: + """ + Apply new configuration to the adapter. + + Called by supervisor when config changes via hot-reload. + The adapter should extract relevant settings from + new_config.settings and update its internal state. + """ + ... + async def startup(self) -> None: """Optional lifecycle hook called before first poll.""" pass From dfcc0c3a5c23ce2db7efcb05ea046ebcdee5ee58 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:49:46 +0000 Subject: [PATCH 08/22] refactor(nws): migrate from states to bbox region filtering - Add RegionConfig pydantic model with validators - NWSAdapter now uses bbox for client-side alert filtering - Implement apply_config for hot-reload of region changes - Remove states-based filtering logic Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/nws.py | 951 ++++++++++++++++++----------------- src/central/config_models.py | 109 ++-- 2 files changed, 572 insertions(+), 488 deletions(-) diff --git a/src/central/adapters/nws.py b/src/central/adapters/nws.py index c8d3c6a..76391e1 100644 --- a/src/central/adapters/nws.py +++ b/src/central/adapters/nws.py @@ -1,449 +1,502 @@ -"""NWS (National Weather Service) alert adapter.""" - -import asyncio -import logging -import re -import sqlite3 -from collections.abc import AsyncIterator -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -import aiohttp -from tenacity import ( - retry, - stop_after_attempt, - wait_exponential_jitter, - retry_if_exception_type, -) - -from central import __version__ -from central.adapter import SourceAdapter -from central.config import NWSAdapterConfig -from central.models import Event, Geo - -logger = logging.getLogger(__name__) - -# FIPS state codes to postal abbreviations -FIPS_TO_STATE: dict[str, str] = { - "01": "AL", "02": "AK", "04": "AZ", "05": "AR", "06": "CA", - "08": "CO", "09": "CT", "10": "DE", "11": "DC", "12": "FL", - "13": "GA", "15": "HI", "16": "ID", "17": "IL", "18": "IN", - "19": "IA", "20": "KS", "21": "KY", "22": "LA", "23": "ME", - "24": "MD", "25": "MA", "26": "MI", "27": "MN", "28": "MS", - "29": "MO", "30": "MT", "31": "NE", "32": "NV", "33": "NH", - "34": "NJ", "35": "NM", "36": "NY", "37": "NC", "38": "ND", - "39": "OH", "40": "OK", "41": "OR", "42": "PA", "44": "RI", - "45": "SC", "46": "SD", "47": "TN", "48": "TX", "49": "UT", - "50": "VT", "51": "VA", "53": "WA", "54": "WV", "55": "WI", - "56": "WY", "60": "AS", "66": "GU", "69": "MP", "72": "PR", - "78": "VI", -} - -SEVERITY_MAP: dict[str, int | None] = { - "Extreme": 4, - "Severe": 3, - "Moderate": 2, - "Minor": 1, - "Unknown": None, -} - -NWS_API_URL = "https://api.weather.gov/alerts/active" - - -def _snake_case(s: str) -> str: - """Convert a string to snake_case.""" - s = re.sub(r"[^a-zA-Z0-9\s]", "", s) - s = re.sub(r"\s+", "_", s.strip()) - return s.lower() - - -def _parse_datetime(s: str | None) -> datetime | None: - """Parse an ISO datetime string to UTC datetime.""" - if not s: - return None - try: - dt = datetime.fromisoformat(s.replace("Z", "+00:00")) - return dt.astimezone(timezone.utc) - except (ValueError, TypeError): - return None - - -def _compute_centroid(geometry: dict[str, Any] | None) -> tuple[float, float] | None: - """Compute centroid from GeoJSON geometry using arithmetic mean of vertices.""" - if not geometry: - return None - - geom_type = geometry.get("type") - coords = geometry.get("coordinates") - - if not coords: - return None - - all_points: list[tuple[float, float]] = [] - - if geom_type == "Point": - return (coords[0], coords[1]) - elif geom_type == "Polygon": - for ring in coords: - for point in ring: - all_points.append((point[0], point[1])) - elif geom_type == "MultiPolygon": - for polygon in coords: - for ring in polygon: - for point in ring: - all_points.append((point[0], point[1])) - else: - return None - - if not all_points: - return None - - avg_lon = sum(p[0] for p in all_points) / len(all_points) - avg_lat = sum(p[1] for p in all_points) / len(all_points) - return (avg_lon, avg_lat) - - -def _compute_bbox( - geometry: dict[str, Any] | None -) -> tuple[float, float, float, float] | None: - """Compute bounding box from GeoJSON geometry.""" - if not geometry: - return None - - geom_type = geometry.get("type") - coords = geometry.get("coordinates") - - if not coords: - return None - - all_points: list[tuple[float, float]] = [] - - if geom_type == "Point": - return (coords[0], coords[1], coords[0], coords[1]) - elif geom_type == "Polygon": - for ring in coords: - for point in ring: - all_points.append((point[0], point[1])) - elif geom_type == "MultiPolygon": - for polygon in coords: - for ring in polygon: - for point in ring: - all_points.append((point[0], point[1])) - else: - return None - - if not all_points: - return None - - min_lon = min(p[0] for p in all_points) - max_lon = max(p[0] for p in all_points) - min_lat = min(p[1] for p in all_points) - max_lat = max(p[1] for p in all_points) - return (min_lon, min_lat, max_lon, max_lat) - - -def _extract_states_from_codes( - same_codes: list[str], ugc_codes: list[str] -) -> set[str]: - """Extract state abbreviations from SAME and UGC codes.""" - states: set[str] = set() - - for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] - if fips_state in FIPS_TO_STATE: - states.add(FIPS_TO_STATE[fips_state]) - - for code in ugc_codes: - if len(code) >= 2 and code[:2].isalpha(): - states.add(code[:2].upper()) - - return states - - -def _build_regions(same_codes: list[str], ugc_codes: list[str]) -> list[str]: - """Build sorted list of region strings from geocodes.""" - regions: set[str] = set() - - for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] - if fips_state in FIPS_TO_STATE: - state = FIPS_TO_STATE[fips_state] - regions.add(f"US-{state}-FIPS{code}") - - for code in ugc_codes: - if len(code) >= 3 and code[:2].isalpha(): - state = code[:2].upper() - rest = code[2:] - if rest.startswith("C"): - regions.add(f"US-{state}-C{rest[1:]}") - elif rest.startswith("Z"): - regions.add(f"US-{state}-Z{rest[1:]}") - else: - regions.add(f"US-{state}-{rest}") - - return sorted(regions) - - -class NWSAdapter(SourceAdapter): - """National Weather Service alerts adapter.""" - - name = "nws" - - def __init__( - self, - config: NWSAdapterConfig, - cursor_db_path: Path, - ) -> None: - self.config = config - self.states = set(s.upper() for s in config.states) - self.cursor_db_path = cursor_db_path - self._session: aiohttp.ClientSession | None = None - self._db: sqlite3.Connection | None = None - - async def startup(self) -> None: - """Initialize HTTP session and cursor database.""" - user_agent = f"Central/{__version__} ({self.config.contact_email})" - self._session = aiohttp.ClientSession( - headers={"User-Agent": user_agent}, - timeout=aiohttp.ClientTimeout(total=30), - ) - - self._db = sqlite3.connect(str(self.cursor_db_path)) - self._db.execute(""" - CREATE TABLE IF NOT EXISTS adapter_cursors ( - adapter TEXT PRIMARY KEY, - cursor_data TEXT NOT NULL, - updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP - ) - """) - self._db.execute(""" - CREATE TABLE IF NOT EXISTS published_ids ( - adapter TEXT NOT NULL, - event_id TEXT NOT NULL, - first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (adapter, event_id) - ) - """) - self._db.execute(""" - CREATE INDEX IF NOT EXISTS published_ids_last_seen - ON published_ids (last_seen) - """) - self._db.commit() - - logger.info("NWS adapter started", extra={"states": list(self.states)}) - - async def shutdown(self) -> None: - """Close HTTP session and database.""" - if self._session: - await self._session.close() - self._session = None - if self._db: - self._db.close() - self._db = None - logger.info("NWS adapter shut down") - - def _get_cursor(self) -> str | None: - """Get the stored If-Modified-Since cursor.""" - if not self._db: - return None - cur = self._db.execute( - "SELECT cursor_data FROM adapter_cursors WHERE adapter = ?", - (self.name,) - ) - row = cur.fetchone() - return row[0] if row else None - - def _set_cursor(self, last_modified: str) -> None: - """Store the Last-Modified header for next request.""" - if not self._db: - return - self._db.execute( - """ - INSERT INTO adapter_cursors (adapter, cursor_data, updated) - VALUES (?, ?, CURRENT_TIMESTAMP) - ON CONFLICT (adapter) DO UPDATE SET - cursor_data = excluded.cursor_data, - updated = CURRENT_TIMESTAMP - """, - (self.name, last_modified) - ) - self._db.commit() - - def is_published(self, event_id: str) -> bool: - """Check if an event has already been published.""" - if not self._db: - return False - cur = self._db.execute( - "SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?", - (self.name, event_id) - ) - return cur.fetchone() is not None - - def mark_published(self, event_id: str) -> None: - """Mark an event as published.""" - if not self._db: - return - self._db.execute( - """ - INSERT INTO published_ids (adapter, event_id, first_seen, last_seen) - VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - ON CONFLICT (adapter, event_id) DO UPDATE SET - last_seen = CURRENT_TIMESTAMP - """, - (self.name, event_id) - ) - self._db.commit() - - def bump_last_seen(self, event_id: str) -> None: - """Bump the last_seen timestamp for an event.""" - if not self._db: - return - self._db.execute( - "UPDATE published_ids SET last_seen = CURRENT_TIMESTAMP WHERE adapter = ? AND event_id = ?", - (self.name, event_id) - ) - self._db.commit() - - def sweep_old_ids(self) -> int: - """Remove published_ids older than 8 days. Returns count deleted.""" - if not self._db: - return 0 - cur = self._db.execute( - "DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')" - ) - self._db.commit() - return cur.rowcount - - @retry( - stop=stop_after_attempt(5), - wait=wait_exponential_jitter(initial=1, max=60), - retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)), - reraise=True, - ) - async def _fetch_alerts(self) -> tuple[int, dict[str, Any] | None, str | None]: - """Fetch alerts from NWS API with conditional request.""" - if not self._session: - raise RuntimeError("Session not initialized") - - headers: dict[str, str] = {} - cursor = self._get_cursor() - if cursor: - headers["If-Modified-Since"] = cursor - - async with self._session.get(NWS_API_URL, headers=headers) as resp: - if resp.status in (429, 403): - retry_after = resp.headers.get("Retry-After", "60") - try: - wait_time = int(retry_after) - except ValueError: - wait_time = 60 - logger.warning( - "Rate limited by NWS", - extra={"status": resp.status, "retry_after": wait_time} - ) - await asyncio.sleep(wait_time) - raise aiohttp.ClientError(f"Rate limited: {resp.status}") - - if resp.status == 304: - return (304, None, None) - - resp.raise_for_status() - - data = await resp.json() - last_modified = resp.headers.get("Last-Modified") - - return (resp.status, data, last_modified) - - def _normalize_feature(self, feature: dict[str, Any]) -> Event | None: - """Normalize a GeoJSON feature to an Event.""" - props = feature.get("properties", {}) - geocode = props.get("geocode", {}) - - same_codes = geocode.get("SAME", []) - ugc_codes = geocode.get("UGC", []) - - feature_states = _extract_states_from_codes(same_codes, ugc_codes) - if not feature_states.intersection(self.states): - return None - - event_id = feature.get("id") - if not event_id: - logger.warning("Feature missing id", extra={"properties": props}) - return None - - event_type = props.get("event", "Unknown") - category = f"wx.alert.{_snake_case(event_type)}" - - time = _parse_datetime(props.get("sent")) - if not time: - logger.warning("Feature missing sent time", extra={"id": event_id}) - return None - - expires = _parse_datetime(props.get("expires")) - - severity_str = props.get("severity", "Unknown") - severity = SEVERITY_MAP.get(severity_str) - - geometry = feature.get("geometry") - centroid = _compute_centroid(geometry) - bbox = _compute_bbox(geometry) - regions = _build_regions(same_codes, ugc_codes) - primary_region = regions[0] if regions else None - - geo = Geo( - centroid=centroid, - bbox=bbox, - regions=regions, - primary_region=primary_region, - ) - - return Event( - id=event_id, - source="central/adapters/nws", - category=category, - time=time, - expires=expires, - severity=severity, - geo=geo, - data=props, - ) - - async def poll(self) -> AsyncIterator[Event]: - """Poll NWS API for active alerts.""" - try: - status, data, last_modified = await self._fetch_alerts() - except Exception as e: - logger.error("Failed to fetch NWS alerts", extra={"error": str(e)}) - raise - - if status == 304: - logger.info("NWS returned 304 Not Modified") - return - - if last_modified: - self._set_cursor(last_modified) - - features = data.get("features", []) if data else [] - logger.info( - "NWS poll completed", - extra={"status": status, "feature_count": len(features)} - ) - - yielded = 0 - for feature in features: - try: - event = self._normalize_feature(feature) - if event: - yield event - yielded += 1 - except Exception as e: - logger.warning( - "Failed to normalize feature", - extra={"error": str(e), "feature_id": feature.get("id")} - ) - - logger.info("NWS yielded events", extra={"count": yielded}) +"""NWS (National Weather Service) alert adapter.""" + +import asyncio +import logging +import re +import sqlite3 +from collections.abc import AsyncIterator +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import aiohttp +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential_jitter, + retry_if_exception_type, +) + +from central import __version__ +from central.adapter import SourceAdapter +from central.config_models import AdapterConfig, RegionConfig +from central.models import Event, Geo + +logger = logging.getLogger(__name__) + +# FIPS state codes to postal abbreviations +FIPS_TO_STATE: dict[str, str] = { + "01": "AL", "02": "AK", "04": "AZ", "05": "AR", "06": "CA", + "08": "CO", "09": "CT", "10": "DE", "11": "DC", "12": "FL", + "13": "GA", "15": "HI", "16": "ID", "17": "IL", "18": "IN", + "19": "IA", "20": "KS", "21": "KY", "22": "LA", "23": "ME", + "24": "MD", "25": "MA", "26": "MI", "27": "MN", "28": "MS", + "29": "MO", "30": "MT", "31": "NE", "32": "NV", "33": "NH", + "34": "NJ", "35": "NM", "36": "NY", "37": "NC", "38": "ND", + "39": "OH", "40": "OK", "41": "OR", "42": "PA", "44": "RI", + "45": "SC", "46": "SD", "47": "TN", "48": "TX", "49": "UT", + "50": "VT", "51": "VA", "53": "WA", "54": "WV", "55": "WI", + "56": "WY", "60": "AS", "66": "GU", "69": "MP", "72": "PR", + "78": "VI", +} + +SEVERITY_MAP: dict[str, int | None] = { + "Extreme": 4, + "Severe": 3, + "Moderate": 2, + "Minor": 1, + "Unknown": None, +} + +NWS_API_URL = "https://api.weather.gov/alerts/active" + + +def _snake_case(s: str) -> str: + """Convert a string to snake_case.""" + s = re.sub(r"[^a-zA-Z0-9\s]", "", s) + s = re.sub(r"\s+", "_", s.strip()) + return s.lower() + + +def _parse_datetime(s: str | None) -> datetime | None: + """Parse an ISO datetime string to UTC datetime.""" + if not s: + return None + try: + dt = datetime.fromisoformat(s.replace("Z", "+00:00")) + return dt.astimezone(timezone.utc) + except (ValueError, TypeError): + return None + + +def _compute_centroid(geometry: dict[str, Any] | None) -> tuple[float, float] | None: + """Compute centroid from GeoJSON geometry using arithmetic mean of vertices.""" + if not geometry: + return None + + geom_type = geometry.get("type") + coords = geometry.get("coordinates") + + if not coords: + return None + + all_points: list[tuple[float, float]] = [] + + if geom_type == "Point": + return (coords[0], coords[1]) + elif geom_type == "Polygon": + for ring in coords: + for point in ring: + all_points.append((point[0], point[1])) + elif geom_type == "MultiPolygon": + for polygon in coords: + for ring in polygon: + for point in ring: + all_points.append((point[0], point[1])) + else: + return None + + if not all_points: + return None + + avg_lon = sum(p[0] for p in all_points) / len(all_points) + avg_lat = sum(p[1] for p in all_points) / len(all_points) + return (avg_lon, avg_lat) + + +def _compute_bbox( + geometry: dict[str, Any] | None +) -> tuple[float, float, float, float] | None: + """Compute bounding box from GeoJSON geometry.""" + if not geometry: + return None + + geom_type = geometry.get("type") + coords = geometry.get("coordinates") + + if not coords: + return None + + all_points: list[tuple[float, float]] = [] + + if geom_type == "Point": + return (coords[0], coords[1], coords[0], coords[1]) + elif geom_type == "Polygon": + for ring in coords: + for point in ring: + all_points.append((point[0], point[1])) + elif geom_type == "MultiPolygon": + for polygon in coords: + for ring in polygon: + for point in ring: + all_points.append((point[0], point[1])) + else: + return None + + if not all_points: + return None + + min_lon = min(p[0] for p in all_points) + max_lon = max(p[0] for p in all_points) + min_lat = min(p[1] for p in all_points) + max_lat = max(p[1] for p in all_points) + return (min_lon, min_lat, max_lon, max_lat) + + +def _extract_states_from_codes( + same_codes: list[str], ugc_codes: list[str] +) -> set[str]: + """Extract state abbreviations from SAME and UGC codes.""" + states: set[str] = set() + + for code in same_codes: + if len(code) >= 2: + fips_state = code[:2] + if fips_state in FIPS_TO_STATE: + states.add(FIPS_TO_STATE[fips_state]) + + for code in ugc_codes: + if len(code) >= 2 and code[:2].isalpha(): + states.add(code[:2].upper()) + + return states + + +def _build_regions(same_codes: list[str], ugc_codes: list[str]) -> list[str]: + """Build sorted list of region strings from geocodes.""" + regions: set[str] = set() + + for code in same_codes: + if len(code) >= 2: + fips_state = code[:2] + if fips_state in FIPS_TO_STATE: + state = FIPS_TO_STATE[fips_state] + regions.add(f"US-{state}-FIPS{code}") + + for code in ugc_codes: + if len(code) >= 3 and code[:2].isalpha(): + state = code[:2].upper() + rest = code[2:] + if rest.startswith("C"): + regions.add(f"US-{state}-C{rest[1:]}") + elif rest.startswith("Z"): + regions.add(f"US-{state}-Z{rest[1:]}") + else: + regions.add(f"US-{state}-{rest}") + + return sorted(regions) + + +class NWSAdapter(SourceAdapter): + """National Weather Service alerts adapter.""" + + name = "nws" + + def __init__( + self, + config: AdapterConfig, + cursor_db_path: Path, + ) -> None: + self.cursor_db_path = cursor_db_path + self._session: aiohttp.ClientSession | None = None + self._db: sqlite3.Connection | None = None + + # Extract settings from unified config + self.contact_email: str = config.settings.get("contact_email", "") + + # Parse region from settings + region_dict = config.settings.get("region") + if region_dict: + self.region: RegionConfig | None = RegionConfig(**region_dict) + else: + self.region = None + + async def apply_config(self, new_config: AdapterConfig) -> None: + """Apply new configuration from hot-reload.""" + # Update contact email + self.contact_email = new_config.settings.get("contact_email", "") + + # Update region + region_dict = new_config.settings.get("region") + if region_dict: + self.region = RegionConfig(**region_dict) + else: + self.region = None + + logger.info( + "NWS config applied", + extra={ + "region": region_dict, + "contact_email": self.contact_email, + }, + ) + + def _point_in_region(self, centroid: tuple[float, float] | None) -> bool: + """Check if centroid is within configured region bbox.""" + if self.region is None: + # No region configured = accept all + return True + if centroid is None: + return False + lon, lat = centroid + return ( + self.region.west <= lon <= self.region.east + and self.region.south <= lat <= self.region.north + ) + + async def startup(self) -> None: + """Initialize HTTP session and cursor database.""" + user_agent = f"Central/{__version__} ({self.contact_email})" + self._session = aiohttp.ClientSession( + headers={"User-Agent": user_agent}, + timeout=aiohttp.ClientTimeout(total=30), + ) + + self._db = sqlite3.connect(str(self.cursor_db_path)) + self._db.execute(""" + CREATE TABLE IF NOT EXISTS adapter_cursors ( + adapter TEXT PRIMARY KEY, + cursor_data TEXT NOT NULL, + updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """) + self._db.execute(""" + CREATE TABLE IF NOT EXISTS published_ids ( + adapter TEXT NOT NULL, + event_id TEXT NOT NULL, + first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (adapter, event_id) + ) + """) + self._db.execute(""" + CREATE INDEX IF NOT EXISTS published_ids_last_seen + ON published_ids (last_seen) + """) + self._db.commit() + + logger.info( + "NWS adapter started", + extra={ + "region": { + "north": self.region.north, + "south": self.region.south, + "east": self.region.east, + "west": self.region.west, + } if self.region else None, + }, + ) + + async def shutdown(self) -> None: + """Close HTTP session and database.""" + if self._session: + await self._session.close() + self._session = None + if self._db: + self._db.close() + self._db = None + logger.info("NWS adapter shut down") + + def _get_cursor(self) -> str | None: + """Get the stored If-Modified-Since cursor.""" + if not self._db: + return None + cur = self._db.execute( + "SELECT cursor_data FROM adapter_cursors WHERE adapter = ?", + (self.name,) + ) + row = cur.fetchone() + return row[0] if row else None + + def _set_cursor(self, last_modified: str) -> None: + """Store the Last-Modified header for next request.""" + if not self._db: + return + self._db.execute( + """ + INSERT INTO adapter_cursors (adapter, cursor_data, updated) + VALUES (?, ?, CURRENT_TIMESTAMP) + ON CONFLICT (adapter) DO UPDATE SET + cursor_data = excluded.cursor_data, + updated = CURRENT_TIMESTAMP + """, + (self.name, last_modified) + ) + self._db.commit() + + def is_published(self, event_id: str) -> bool: + """Check if an event has already been published.""" + if not self._db: + return False + cur = self._db.execute( + "SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?", + (self.name, event_id) + ) + return cur.fetchone() is not None + + def mark_published(self, event_id: str) -> None: + """Mark an event as published.""" + if not self._db: + return + self._db.execute( + """ + INSERT INTO published_ids (adapter, event_id, first_seen, last_seen) + VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (adapter, event_id) DO UPDATE SET + last_seen = CURRENT_TIMESTAMP + """, + (self.name, event_id) + ) + self._db.commit() + + def bump_last_seen(self, event_id: str) -> None: + """Bump the last_seen timestamp for an event.""" + if not self._db: + return + self._db.execute( + "UPDATE published_ids SET last_seen = CURRENT_TIMESTAMP WHERE adapter = ? AND event_id = ?", + (self.name, event_id) + ) + self._db.commit() + + def sweep_old_ids(self) -> int: + """Remove published_ids older than 8 days. Returns count deleted.""" + if not self._db: + return 0 + cur = self._db.execute( + "DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')" + ) + self._db.commit() + return cur.rowcount + + @retry( + stop=stop_after_attempt(5), + wait=wait_exponential_jitter(initial=1, max=60), + retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)), + reraise=True, + ) + async def _fetch_alerts(self) -> tuple[int, dict[str, Any] | None, str | None]: + """Fetch alerts from NWS API with conditional request.""" + if not self._session: + raise RuntimeError("Session not initialized") + + headers: dict[str, str] = {} + cursor = self._get_cursor() + if cursor: + headers["If-Modified-Since"] = cursor + + async with self._session.get(NWS_API_URL, headers=headers) as resp: + if resp.status in (429, 403): + retry_after = resp.headers.get("Retry-After", "60") + try: + wait_time = int(retry_after) + except ValueError: + wait_time = 60 + logger.warning( + "Rate limited by NWS", + extra={"status": resp.status, "retry_after": wait_time} + ) + await asyncio.sleep(wait_time) + raise aiohttp.ClientError(f"Rate limited: {resp.status}") + + if resp.status == 304: + return (304, None, None) + + resp.raise_for_status() + + data = await resp.json() + last_modified = resp.headers.get("Last-Modified") + + return (resp.status, data, last_modified) + + def _normalize_feature(self, feature: dict[str, Any]) -> Event | None: + """Normalize a GeoJSON feature to an Event.""" + props = feature.get("properties", {}) + geocode = props.get("geocode", {}) + + same_codes = geocode.get("SAME", []) + ugc_codes = geocode.get("UGC", []) + + # Compute geometry data first + geometry = feature.get("geometry") + centroid = _compute_centroid(geometry) + bbox = _compute_bbox(geometry) + + # Filter by region bbox (client-side filtering) + if not self._point_in_region(centroid): + return None + + event_id = feature.get("id") + if not event_id: + logger.warning("Feature missing id", extra={"properties": props}) + return None + + event_type = props.get("event", "Unknown") + category = f"wx.alert.{_snake_case(event_type)}" + + time = _parse_datetime(props.get("sent")) + if not time: + logger.warning("Feature missing sent time", extra={"id": event_id}) + return None + + expires = _parse_datetime(props.get("expires")) + + severity_str = props.get("severity", "Unknown") + severity = SEVERITY_MAP.get(severity_str) + + regions = _build_regions(same_codes, ugc_codes) + primary_region = regions[0] if regions else None + + geo = Geo( + centroid=centroid, + bbox=bbox, + regions=regions, + primary_region=primary_region, + ) + + return Event( + id=event_id, + source="central/adapters/nws", + category=category, + time=time, + expires=expires, + severity=severity, + geo=geo, + data=props, + ) + + async def poll(self) -> AsyncIterator[Event]: + """Poll NWS API for active alerts.""" + try: + status, data, last_modified = await self._fetch_alerts() + except Exception as e: + logger.error("Failed to fetch NWS alerts", extra={"error": str(e)}) + raise + + if status == 304: + logger.info("NWS returned 304 Not Modified") + return + + if last_modified: + self._set_cursor(last_modified) + + features = data.get("features", []) if data else [] + logger.info( + "NWS poll completed", + extra={"status": status, "feature_count": len(features)} + ) + + yielded = 0 + for feature in features: + try: + event = self._normalize_feature(feature) + if event: + yield event + yielded += 1 + except Exception as e: + logger.warning( + "Failed to normalize feature", + extra={"error": str(e), "feature_id": feature.get("id")} + ) + + logger.info("NWS yielded events", extra={"count": yielded}) diff --git a/src/central/config_models.py b/src/central/config_models.py index 02855ca..0447c56 100644 --- a/src/central/config_models.py +++ b/src/central/config_models.py @@ -1,39 +1,70 @@ -"""Pydantic models for database-backed configuration.""" - -from datetime import datetime -from typing import Any - -from pydantic import BaseModel, Field - - -class AdapterConfig(BaseModel): - """Configuration for a single adapter.""" - - name: str = Field(description="Unique adapter identifier") - enabled: bool = Field(default=True, description="Whether adapter is active") - cadence_s: int = Field(description="Poll interval in seconds") - settings: dict[str, Any] = Field( - default_factory=dict, description="Adapter-specific settings" - ) - paused_at: datetime | None = Field( - default=None, description="When adapter was paused, if paused" - ) - updated_at: datetime = Field(description="Last configuration update time") - - @property - def is_paused(self) -> bool: - """Check if adapter is currently paused.""" - return self.paused_at is not None - - -class ApiKeyInfo(BaseModel): - """Metadata about an API key (without the decrypted value).""" - - alias: str = Field(description="Key identifier/alias") - created_at: datetime = Field(description="When key was created") - rotated_at: datetime | None = Field( - default=None, description="Last rotation time" - ) - last_used_at: datetime | None = Field( - default=None, description="Last usage time" - ) +"""Pydantic models for database-backed configuration.""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field, model_validator + + +class RegionConfig(BaseModel): + """Geographic bounding box for adapter region filtering.""" + + north: float = Field(ge=-90, le=90, description="Northern latitude bound") + south: float = Field(ge=-90, le=90, description="Southern latitude bound") + east: float = Field(ge=-180, le=180, description="Eastern longitude bound") + west: float = Field(ge=-180, le=180, description="Western longitude bound") + + @model_validator(mode="after") + def validate_bounds(self) -> "RegionConfig": + if self.north <= self.south: + raise ValueError( + f"north ({self.north}) must be greater than south ({self.south})" + ) + if self.east == self.west: + raise ValueError("east and west cannot be equal (zero-width bbox)") + return self + + +class AdapterConfig(BaseModel): + """Configuration for a single adapter.""" + + name: str = Field(description="Unique adapter identifier") + enabled: bool = Field(default=True, description="Whether adapter is active") + cadence_s: int = Field(description="Poll interval in seconds") + settings: dict[str, Any] = Field( + default_factory=dict, description="Adapter-specific settings" + ) + paused_at: datetime | None = Field( + default=None, description="When adapter was paused, if paused" + ) + updated_at: datetime = Field(description="Last configuration update time") + + @property + def is_paused(self) -> bool: + """Check if adapter is currently paused.""" + return self.paused_at is not None + + +class StreamConfig(BaseModel): + """Configuration for a JetStream stream.""" + + name: str = Field(description="Stream name") + max_age_s: int = Field(description="Maximum message age in seconds") + max_bytes: int = Field(description="Maximum stream size in bytes") + managed_max_bytes: bool = Field( + default=True, description="Whether max_bytes is auto-managed by supervisor" + ) + updated_at: datetime = Field(description="Last configuration update time") + + +class ApiKeyInfo(BaseModel): + """Metadata about an API key (without the decrypted value).""" + + alias: str = Field(description="Key identifier/alias") + created_at: datetime = Field(description="When key was created") + rotated_at: datetime | None = Field( + default=None, description="Last rotation time" + ) + last_used_at: datetime | None = Field( + default=None, description="Last usage time" + ) From ab7126ec8da73a42723d67f55aeb4c3c2236b4aa Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:49:53 +0000 Subject: [PATCH 09/22] refactor(supervisor): remove adapter-specific branches, add stream wiring - Replace if name == nws with generic apply_config call - Add _create_adapter factory method - Add stream management: ensure_stream, retention recompute loop - Handle streams config changes via NOTIFY Co-Authored-By: Claude Opus 4.5 --- src/central/supervisor.py | 231 ++++++++++++++++++++++++++++++-------- 1 file changed, 185 insertions(+), 46 deletions(-) diff --git a/src/central/supervisor.py b/src/central/supervisor.py index fad8588..2543291 100644 --- a/src/central/supervisor.py +++ b/src/central/supervisor.py @@ -13,16 +13,27 @@ from typing import Any import nats from nats.js import JetStreamContext +from central.adapter import SourceAdapter from central.adapters.nws import NWSAdapter from central.cloudevents_wire import wrap_event -from central.config import NWSAdapterConfig from central.config_models import AdapterConfig from central.config_source import ConfigSource, DbConfigSource +from central.config_store import ConfigStore from central.bootstrap_config import get_settings from central.models import subject_for_event +from central.stream_manager import StreamManager CURSOR_DB_PATH = Path("/var/lib/central/cursors.db") +# Stream subject mappings +STREAM_SUBJECTS = { + "CENTRAL_WX": ["central.wx.>"], + "CENTRAL_META": ["central.meta.>"], +} + +# Recompute interval for stream max_bytes (1 hour) +STREAM_RECOMPUTE_INTERVAL_S = 3600 + class JsonFormatter(logging.Formatter): """JSON log formatter for structured logging.""" @@ -66,7 +77,7 @@ class AdapterState: """Runtime state for a scheduled adapter.""" name: str - adapter: NWSAdapter + adapter: SourceAdapter config: AdapterConfig task: asyncio.Task[None] | None = None last_completed_poll: datetime | None = None @@ -84,14 +95,17 @@ class Supervisor: def __init__( self, config_source: ConfigSource, + config_store: ConfigStore, nats_url: str, cloudevents_config: Any = None, ) -> None: self._config_source = config_source + self._config_store = config_store self._nats_url = nats_url self._cloudevents_config = cloudevents_config self._nc: nats.NATS | None = None self._js: JetStreamContext | None = None + self._stream_manager: StreamManager | None = None self._adapter_states: dict[str, AdapterState] = {} self._tasks: list[asyncio.Task[None]] = [] self._shutdown_event = asyncio.Event() @@ -103,6 +117,7 @@ class Supervisor: """Connect to NATS.""" self._nc = await nats.connect(self._nats_url) self._js = self._nc.jetstream() + self._stream_manager = StreamManager(self._js) logger.info("Connected to NATS", extra={"url": self._nats_url}) async def disconnect(self) -> None: @@ -112,6 +127,7 @@ class Supervisor: await self._nc.close() self._nc = None self._js = None + self._stream_manager = None logger.info("Disconnected from NATS") async def _publish_meta(self, subject: str, data: dict[str, Any]) -> None: @@ -132,14 +148,12 @@ class Supervisor: headers={"Nats-Msg-Id": msg_id}, ) - def _adapter_config_to_nws_config(self, config: AdapterConfig) -> NWSAdapterConfig: - """Convert unified AdapterConfig to NWSAdapterConfig.""" - return NWSAdapterConfig( - enabled=config.enabled, - cadence_s=config.cadence_s, - states=config.settings.get("states", []), - contact_email=config.settings.get("contact_email", ""), - ) + def _create_adapter(self, config: AdapterConfig) -> SourceAdapter: + """Create an adapter instance based on config name.""" + if config.name == "nws": + return NWSAdapter(config=config, cursor_db_path=CURSOR_DB_PATH) + else: + raise ValueError(f"Unknown adapter type: {config.name}") async def _run_adapter_loop(self, state: AdapterState) -> None: """Run an adapter poll loop with rate-limit aware scheduling.""" @@ -263,11 +277,7 @@ class Supervisor: existing_state.cancel_event.clear() # Reinitialize the adapter with new config - nws_config = self._adapter_config_to_nws_config(config) - existing_state.adapter = NWSAdapter( - config=nws_config, - cursor_db_path=CURSOR_DB_PATH, - ) + existing_state.adapter = self._create_adapter(config) await existing_state.adapter.startup() # Start the loop task @@ -300,34 +310,29 @@ class Supervisor: return # New adapter - create fresh state - if config.name == "nws": - nws_config = self._adapter_config_to_nws_config(config) - adapter = NWSAdapter( - config=nws_config, - cursor_db_path=CURSOR_DB_PATH, - ) - await adapter.startup() + try: + adapter = self._create_adapter(config) + except ValueError as e: + logger.warning(str(e), extra={"adapter": config.name}) + return - state = AdapterState( - name=config.name, - adapter=adapter, - config=config, - ) - state.task = asyncio.create_task(self._run_adapter_loop(state)) - self._adapter_states[config.name] = state + await adapter.startup() - logger.info( - "Adapter started", - extra={ - "adapter": config.name, - "cadence_s": config.cadence_s, - }, - ) - else: - logger.warning( - "Unknown adapter type", - extra={"adapter": config.name}, - ) + state = AdapterState( + name=config.name, + adapter=adapter, + config=config, + ) + state.task = asyncio.create_task(self._run_adapter_loop(state)) + self._adapter_states[config.name] = state + + logger.info( + "Adapter started", + extra={ + "adapter": config.name, + "cadence_s": config.cadence_s, + }, + ) async def _stop_adapter(self, name: str) -> None: """Stop a running adapter but preserve state for potential restart. @@ -423,10 +428,8 @@ class Supervisor: # Update config state.config = new_config - # Update adapter settings if needed (e.g., states list) - if name == "nws": - nws_config = self._adapter_config_to_nws_config(new_config) - state.adapter.states = set(s.upper() for s in nws_config.states) + # Apply config to adapter (generic - each adapter handles its own settings) + await state.adapter.apply_config(new_config) # Calculate next poll time for logging if state.last_completed_poll: @@ -451,11 +454,139 @@ class Supervisor: # This ensures immediate event delivery to the sleeping loop. return state + async def _ensure_streams(self) -> None: + """Ensure all configured streams exist with correct settings.""" + if not self._stream_manager: + return + + streams = await self._config_store.list_streams() + for stream_config in streams: + subjects = STREAM_SUBJECTS.get(stream_config.name, []) + if not subjects: + logger.warning( + "No subjects configured for stream", + extra={"stream": stream_config.name}, + ) + continue + + try: + await self._stream_manager.ensure_stream( + stream_config.name, + subjects, + stream_config, + ) + except Exception as e: + logger.error( + "Failed to ensure stream", + extra={"stream": stream_config.name, "error": str(e)}, + ) + + async def _handle_stream_change(self, stream_name: str) -> None: + """Handle a stream configuration change.""" + if not self._stream_manager: + return + + stream_config = await self._config_store.get_stream(stream_name) + if stream_config is None: + logger.warning( + "Stream config not found", + extra={"stream": stream_name}, + ) + return + + try: + # Apply retention settings + await self._stream_manager.apply_retention(stream_name, stream_config) + + # Immediate recompute of max_bytes + new_max_bytes = await self._stream_manager.recompute_max_bytes( + stream_name, stream_config.max_age_s + ) + + # Update database (won't trigger NOTIFY due to column filter) + await self._config_store.update_stream_max_bytes(stream_name, new_max_bytes) + + logger.info( + "Stream retention updated", + extra={ + "stream": stream_name, + "max_age_s": stream_config.max_age_s, + "new_max_bytes": new_max_bytes, + }, + ) + except Exception as e: + logger.error( + "Failed to handle stream change", + extra={"stream": stream_name, "error": str(e)}, + ) + + async def _stream_retention_recompute_loop(self) -> None: + """Periodically recompute max_bytes for all streams.""" + while not self._shutdown_event.is_set(): + try: + await asyncio.wait_for( + self._shutdown_event.wait(), + timeout=STREAM_RECOMPUTE_INTERVAL_S, + ) + # Shutdown requested + break + except asyncio.TimeoutError: + pass + + # Recompute for all streams + if not self._stream_manager: + continue + + streams = await self._config_store.list_streams() + for stream_config in streams: + if not stream_config.managed_max_bytes: + continue + + try: + new_max_bytes = await self._stream_manager.recompute_max_bytes( + stream_config.name, stream_config.max_age_s + ) + + # Only update if change > 10% + change_ratio = abs(new_max_bytes - stream_config.max_bytes) / max(stream_config.max_bytes, 1) + if change_ratio > 0.10: + await self._config_store.update_stream_max_bytes( + stream_config.name, new_max_bytes + ) + await self._stream_manager.apply_retention( + stream_config.name, + await self._config_store.get_stream(stream_config.name), + ) + logger.info( + "Recomputed stream max_bytes", + extra={ + "stream": stream_config.name, + "old_max_bytes": stream_config.max_bytes, + "new_max_bytes": new_max_bytes, + "change_ratio": change_ratio, + }, + ) + except Exception as e: + logger.error( + "Failed to recompute stream max_bytes", + extra={"stream": stream_config.name, "error": str(e)}, + ) + async def _on_config_change(self, table: str, key: str) -> None: """Handle a configuration change notification. Called when NOTIFY fires for config changes. """ + # Handle stream changes + if table == "streams": + stream_name = key + logger.info( + "Stream config change received", + extra={"stream": stream_name}, + ) + await self._handle_stream_change(stream_name) + return + if table != "adapters": return @@ -534,6 +665,9 @@ class Supervisor: """Start the supervisor.""" await self.connect() + # Ensure streams exist with correct configuration + await self._ensure_streams() + # Load and start enabled adapters enabled_adapters = await self._config_source.list_enabled_adapters() for config in enabled_adapters: @@ -547,6 +681,9 @@ class Supervisor: # Start heartbeat self._tasks.append(asyncio.create_task(self._heartbeat_loop())) + # Start stream retention recompute loop + self._tasks.append(asyncio.create_task(self._stream_retention_recompute_loop())) + logger.info( "Supervisor started", extra={"adapters": list(self._adapter_states.keys())}, @@ -594,11 +731,13 @@ async def async_main() -> None: extra={"config_source": "db"}, ) - # Create database config source + # Create database config source and store config_source = await DbConfigSource.create(settings.db_dsn) + config_store = await ConfigStore.create(settings.db_dsn) supervisor = Supervisor( config_source=config_source, + config_store=config_store, nats_url=settings.nats_url, # CloudEvents uses protocol-level defaults from cloudevents_constants cloudevents_config=None, From 71a43d3c98f8ad45fb50e3bb2982ef59bcbb5208 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:49:59 +0000 Subject: [PATCH 10/22] schema: add config.streams table with column-filtered notify - config.streams table for JetStream retention config - Column-filtered NOTIFY: only fires on max_age_s changes - Prevents self-loop when supervisor updates max_bytes - Seeds CENTRAL_WX (7d/10GB) and CENTRAL_META (1d/100MB) Co-Authored-By: Claude Opus 4.5 --- sql/migrations/003_add_streams_table.sql | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 sql/migrations/003_add_streams_table.sql diff --git a/sql/migrations/003_add_streams_table.sql b/sql/migrations/003_add_streams_table.sql new file mode 100644 index 0000000..27d59cd --- /dev/null +++ b/sql/migrations/003_add_streams_table.sql @@ -0,0 +1,46 @@ +-- Migration: 003_add_streams_table +-- Creates the config.streams table for JetStream stream retention configuration. +-- Uses column-filtered NOTIFY to prevent self-loop when supervisor updates max_bytes. + +-- Streams configuration table +CREATE TABLE config.streams ( + name TEXT PRIMARY KEY, + max_age_s BIGINT NOT NULL, + max_bytes BIGINT NOT NULL DEFAULT 1073741824, -- 1GB default + managed_max_bytes BOOLEAN NOT NULL DEFAULT true, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Auto-update trigger for updated_at +CREATE TRIGGER streams_set_updated_at + BEFORE UPDATE ON config.streams + FOR EACH ROW EXECUTE FUNCTION config.set_updated_at(); + +-- Column-filtered NOTIFY trigger for streams. +-- Fires on INSERT/DELETE always. +-- On UPDATE, only fires when max_age_s changes (operator-touchable field), +-- NOT when max_bytes changes (supervisor-managed), to prevent recompute loop. +CREATE OR REPLACE FUNCTION config.notify_streams_change() +RETURNS trigger AS $$ +BEGIN + IF TG_OP = 'INSERT' OR TG_OP = 'DELETE' THEN + PERFORM pg_notify('config_changed', 'streams:' || + COALESCE(NEW.name, OLD.name)); + ELSIF TG_OP = 'UPDATE' AND + OLD.max_age_s IS DISTINCT FROM NEW.max_age_s THEN + PERFORM pg_notify('config_changed', 'streams:' || NEW.name); + END IF; + RETURN COALESCE(NEW, OLD); +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER streams_notify + AFTER INSERT OR UPDATE OR DELETE ON config.streams + FOR EACH ROW EXECUTE FUNCTION config.notify_streams_change(); + +-- Seed with current stream values from investigation +-- CENTRAL_WX: 7d max_age (604800s), 10GB max_bytes (will be clamped to 6GB on first recompute) +-- CENTRAL_META: 1d max_age (86400s), 100MB max_bytes (will be raised to 1GB floor) +INSERT INTO config.streams (name, max_age_s, max_bytes) VALUES + ('CENTRAL_WX', 604800, 10737418240), + ('CENTRAL_META', 86400, 104857600); From da8942a457ba27e63d9fb26901bbdd85fbbae29e Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:50:05 +0000 Subject: [PATCH 11/22] schema: migrate NWS settings from states to region bbox - Remove states array from NWS settings - Add region bbox covering ID/OR/WA/MT/WY/UT/NV - Bbox: north=49.5, south=31.0, east=-102.0, west=-124.5 Co-Authored-By: Claude Opus 4.5 --- sql/migrations/004_nws_states_to_bbox.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 sql/migrations/004_nws_states_to_bbox.sql diff --git a/sql/migrations/004_nws_states_to_bbox.sql b/sql/migrations/004_nws_states_to_bbox.sql new file mode 100644 index 0000000..5294a87 --- /dev/null +++ b/sql/migrations/004_nws_states_to_bbox.sql @@ -0,0 +1,11 @@ +-- Migration: 004_nws_states_to_bbox +-- Converts NWS adapter settings from states list to region bbox. +-- Bbox covers ID/OR/WA/MT/WY/UT/NV with buffer. + +UPDATE config.adapters +SET settings = jsonb_set( + settings - 'states', -- Remove states key + '{region}', + '{"north": 49.5, "south": 31.0, "east": -102.0, "west": -124.5}'::jsonb +) +WHERE name = 'nws'; From f9426caa27a035f2069cae4a77a0f1a0b5aa345f Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 18:50:12 +0000 Subject: [PATCH 12/22] feat: add stream management infrastructure - config_store: add stream CRUD methods - stream_manager: ensure_stream, apply_retention, recompute_max_bytes - Auto-clamp max_bytes to [1GB floor, 30% ceiling] - Parse server max_file_store from nats-server.conf Co-Authored-By: Claude Opus 4.5 --- src/central/config_store.py | 65 ++++++++- src/central/stream_manager.py | 262 ++++++++++++++++++++++++++++++++++ 2 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 src/central/stream_manager.py diff --git a/src/central/config_store.py b/src/central/config_store.py index 0f03826..20415b9 100644 --- a/src/central/config_store.py +++ b/src/central/config_store.py @@ -12,7 +12,7 @@ from typing import Any import asyncpg -from central.config_models import AdapterConfig +from central.config_models import AdapterConfig, StreamConfig from central.crypto import decrypt, encrypt logger = logging.getLogger(__name__) @@ -129,6 +129,69 @@ class ConfigStore: name, ) + # ------------------------------------------------------------------------- + # Stream configuration + # ------------------------------------------------------------------------- + + async def get_stream(self, name: str) -> StreamConfig | None: + """Get configuration for a specific stream.""" + async with self._pool.acquire() as conn: + row = await conn.fetchrow( + """ + SELECT name, max_age_s, max_bytes, managed_max_bytes, updated_at + FROM config.streams + WHERE name = $1 + """, + name, + ) + if row is None: + return None + return StreamConfig(**dict(row)) + + async def list_streams(self) -> list[StreamConfig]: + """List all configured streams.""" + async with self._pool.acquire() as conn: + rows = await conn.fetch( + """ + SELECT name, max_age_s, max_bytes, managed_max_bytes, updated_at + FROM config.streams + ORDER BY name + """ + ) + return [StreamConfig(**dict(row)) for row in rows] + + async def upsert_stream(self, name: str, max_age_s: int) -> None: + """Insert or update a stream's max_age_s (operator-facing).""" + async with self._pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO config.streams (name, max_age_s, updated_at) + VALUES ($1, $2, now()) + ON CONFLICT (name) DO UPDATE SET + max_age_s = EXCLUDED.max_age_s, + updated_at = now() + """, + name, + max_age_s, + ) + + async def update_stream_max_bytes(self, name: str, max_bytes: int) -> None: + """Update a stream's max_bytes (supervisor-internal). + + This update only touches max_bytes, which does NOT trigger + the column-filtered NOTIFY (only max_age_s changes fire NOTIFY). + """ + async with self._pool.acquire() as conn: + await conn.execute( + """ + UPDATE config.streams + SET max_bytes = $2, updated_at = now() + WHERE name = $1 + """, + name, + max_bytes, + ) + # ------------------------------------------------------------------------- # API key management # ------------------------------------------------------------------------- diff --git a/src/central/stream_manager.py b/src/central/stream_manager.py new file mode 100644 index 0000000..4b9b5ba --- /dev/null +++ b/src/central/stream_manager.py @@ -0,0 +1,262 @@ +"""JetStream stream manager for retention configuration.""" + +import logging +import re +from pathlib import Path +from typing import Any + +from nats.js import JetStreamContext +from nats.js.api import StreamConfig, DiscardPolicy, RetentionPolicy + +from central.config_models import StreamConfig as StreamConfigModel + +logger = logging.getLogger(__name__) + +# Constants +ONE_GB = 1024 * 1024 * 1024 # 1 GiB in bytes +NATS_CONFIG_PATH = Path("/etc/nats/nats-server.conf") + + +class StreamManager: + """Manages JetStream stream configuration and retention.""" + + def __init__(self, js: JetStreamContext) -> None: + self._js = js + self._server_max_file_store: int | None = None + + async def server_max_file_store_bytes(self) -> int: + """Get the server's max_file_store setting in bytes. + + Parses the NATS server config file and caches the result. + Returns a default of 20GB if config cannot be read. + """ + if self._server_max_file_store is not None: + return self._server_max_file_store + + default_value = 20 * ONE_GB # 20GB default + + try: + config_text = NATS_CONFIG_PATH.read_text() + + # Parse max_file_store value (supports GB/MB/KB suffixes) + match = re.search(r'max_file_store:\s*(\d+)(GB|MB|KB|G|M|K)?', config_text, re.IGNORECASE) + if match: + value = int(match.group(1)) + suffix = (match.group(2) or "").upper() + + if suffix in ("GB", "G"): + value *= ONE_GB + elif suffix in ("MB", "M"): + value *= 1024 * 1024 + elif suffix in ("KB", "K"): + value *= 1024 + # else: assume bytes + + self._server_max_file_store = value + logger.info( + "Parsed server max_file_store", + extra={"max_file_store_bytes": value}, + ) + return value + + logger.warning( + "max_file_store not found in config, using default", + extra={"default": default_value}, + ) + self._server_max_file_store = default_value + return default_value + + except Exception as e: + logger.warning( + "Failed to read NATS config, using default", + extra={"error": str(e), "default": default_value}, + ) + self._server_max_file_store = default_value + return default_value + + def _compute_ceiling(self, server_max: int) -> int: + """Compute per-stream ceiling as 30% of server max_file_store.""" + return int(server_max * 0.30) + + async def ensure_stream( + self, + name: str, + subjects: list[str], + config: StreamConfigModel, + ) -> None: + """Ensure a stream exists with the given configuration. + + Creates the stream if it doesn't exist, or updates it if it does. + Always enforces: discard=old, max_msgs=-1 (unlimited). + """ + server_max = await self.server_max_file_store_bytes() + ceiling = self._compute_ceiling(server_max) + + # Clamp max_bytes to [1GB, ceiling] + max_bytes = max(ONE_GB, min(config.max_bytes, ceiling)) + + stream_config = StreamConfig( + name=name, + subjects=subjects, + retention=RetentionPolicy.LIMITS, + discard=DiscardPolicy.OLD, + max_age=config.max_age_s, + max_bytes=max_bytes, + max_msgs=-1, # Unlimited messages + ) + + try: + # Try to get existing stream + existing = await self._js.stream_info(name) + + # Update if config differs + await self._js.update_stream(config=stream_config) + logger.info( + "Updated stream", + extra={ + "stream": name, + "max_age_s": config.max_age_s, + "max_bytes": max_bytes, + }, + ) + + except Exception as e: + if "stream not found" in str(e).lower(): + # Create new stream + await self._js.add_stream(config=stream_config) + logger.info( + "Created stream", + extra={ + "stream": name, + "subjects": subjects, + "max_age_s": config.max_age_s, + "max_bytes": max_bytes, + }, + ) + else: + raise + + async def apply_retention(self, name: str, config: StreamConfigModel) -> None: + """Apply retention settings to an existing stream. + + Updates max_age and max_bytes. Always enforces discard=old, max_msgs=-1. + """ + server_max = await self.server_max_file_store_bytes() + ceiling = self._compute_ceiling(server_max) + + # Clamp max_bytes to [1GB, ceiling] + max_bytes = max(ONE_GB, min(config.max_bytes, ceiling)) + + try: + # Get current stream config + info = await self._js.stream_info(name) + current = info.config + + # Build updated config + updated = StreamConfig( + name=name, + subjects=current.subjects, + retention=RetentionPolicy.LIMITS, + discard=DiscardPolicy.OLD, + max_age=config.max_age_s, + max_bytes=max_bytes, + max_msgs=-1, + ) + + await self._js.update_stream(config=updated) + logger.info( + "Applied retention", + extra={ + "stream": name, + "max_age_s": config.max_age_s, + "max_bytes": max_bytes, + }, + ) + + except Exception as e: + logger.error( + "Failed to apply retention", + extra={"stream": name, "error": str(e)}, + ) + raise + + async def recompute_max_bytes(self, name: str, max_age_s: int) -> int: + """Recompute max_bytes based on observed throughput. + + Formula: rate × max_age × 1.5 safety margin, clamped to [1GB, ceiling]. + + Returns the computed max_bytes value. + """ + server_max = await self.server_max_file_store_bytes() + ceiling = self._compute_ceiling(server_max) + + try: + info = await self._js.stream_info(name) + current_bytes = info.state.bytes + current_msgs = info.state.messages + + # Get stream age from first message + first_seq = info.state.first_seq + last_seq = info.state.last_seq + + if current_msgs == 0 or last_seq == 0: + # No messages yet, use floor + return ONE_GB + + # Estimate message age span (approximation) + # Use stream's configured max_age as the observation window + configured_max_age = info.config.max_age + + if configured_max_age > 0: + # Rate = current_bytes / configured_max_age (in seconds) + rate_per_second = current_bytes / configured_max_age + else: + # Fallback: assume 1 day of data + rate_per_second = current_bytes / 86400 + + # Project bytes needed for new max_age with 1.5x safety margin + projected = int(rate_per_second * max_age_s * 1.5) + + # Clamp to [1GB, ceiling] + result = max(ONE_GB, min(projected, ceiling)) + + logger.info( + "Recomputed max_bytes", + extra={ + "stream": name, + "current_bytes": current_bytes, + "rate_per_second": rate_per_second, + "max_age_s": max_age_s, + "projected": projected, + "result": result, + "ceiling": ceiling, + }, + ) + + return result + + except Exception as e: + logger.error( + "Failed to recompute max_bytes, using floor", + extra={"stream": name, "error": str(e)}, + ) + return ONE_GB + + async def get_stream_stats(self, name: str) -> dict[str, Any]: + """Get current stream statistics for monitoring.""" + try: + info = await self._js.stream_info(name) + return { + "stream": name, + "bytes": info.state.bytes, + "messages": info.state.messages, + "max_bytes": info.config.max_bytes, + "max_age_s": info.config.max_age, + "consumers": info.state.consumer_count, + } + except Exception as e: + logger.error( + "Failed to get stream stats", + extra={"stream": name, "error": str(e)}, + ) + return {"stream": name, "error": str(e)} From a157f39fe058fa3cc43a1cb8472f8c940a730309 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:05:05 +0000 Subject: [PATCH 13/22] fix(nws): replace centroid filter with polygon intersection - Add shapely dependency for geometry intersection - Replace _point_in_region with _geometry_intersects_region - Uses Shapely shape() and box() for proper GeoJSON handling - Avoids false negatives on large alert polygons Also adds antimeridian-crossing bbox rejection to RegionConfig validator. Co-Authored-By: Claude Opus 4.5 --- pyproject.toml | 1 + src/central/adapters/nws.py | 1021 +++++++++++++++++----------------- src/central/config_models.py | 2 + uv.lock | 149 ++++- 4 files changed, 660 insertions(+), 513 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d257973..63974db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "nats-py>=2.14.0", "pydantic>=2,<3", "pydantic-settings>=2.7.0", + "shapely>=2.0", "tenacity>=9.1.4", ] diff --git a/src/central/adapters/nws.py b/src/central/adapters/nws.py index 76391e1..d2ad155 100644 --- a/src/central/adapters/nws.py +++ b/src/central/adapters/nws.py @@ -1,502 +1,519 @@ -"""NWS (National Weather Service) alert adapter.""" - -import asyncio -import logging -import re -import sqlite3 -from collections.abc import AsyncIterator -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -import aiohttp -from tenacity import ( - retry, - stop_after_attempt, - wait_exponential_jitter, - retry_if_exception_type, -) - -from central import __version__ -from central.adapter import SourceAdapter -from central.config_models import AdapterConfig, RegionConfig -from central.models import Event, Geo - -logger = logging.getLogger(__name__) - -# FIPS state codes to postal abbreviations -FIPS_TO_STATE: dict[str, str] = { - "01": "AL", "02": "AK", "04": "AZ", "05": "AR", "06": "CA", - "08": "CO", "09": "CT", "10": "DE", "11": "DC", "12": "FL", - "13": "GA", "15": "HI", "16": "ID", "17": "IL", "18": "IN", - "19": "IA", "20": "KS", "21": "KY", "22": "LA", "23": "ME", - "24": "MD", "25": "MA", "26": "MI", "27": "MN", "28": "MS", - "29": "MO", "30": "MT", "31": "NE", "32": "NV", "33": "NH", - "34": "NJ", "35": "NM", "36": "NY", "37": "NC", "38": "ND", - "39": "OH", "40": "OK", "41": "OR", "42": "PA", "44": "RI", - "45": "SC", "46": "SD", "47": "TN", "48": "TX", "49": "UT", - "50": "VT", "51": "VA", "53": "WA", "54": "WV", "55": "WI", - "56": "WY", "60": "AS", "66": "GU", "69": "MP", "72": "PR", - "78": "VI", -} - -SEVERITY_MAP: dict[str, int | None] = { - "Extreme": 4, - "Severe": 3, - "Moderate": 2, - "Minor": 1, - "Unknown": None, -} - -NWS_API_URL = "https://api.weather.gov/alerts/active" - - -def _snake_case(s: str) -> str: - """Convert a string to snake_case.""" - s = re.sub(r"[^a-zA-Z0-9\s]", "", s) - s = re.sub(r"\s+", "_", s.strip()) - return s.lower() - - -def _parse_datetime(s: str | None) -> datetime | None: - """Parse an ISO datetime string to UTC datetime.""" - if not s: - return None - try: - dt = datetime.fromisoformat(s.replace("Z", "+00:00")) - return dt.astimezone(timezone.utc) - except (ValueError, TypeError): - return None - - -def _compute_centroid(geometry: dict[str, Any] | None) -> tuple[float, float] | None: - """Compute centroid from GeoJSON geometry using arithmetic mean of vertices.""" - if not geometry: - return None - - geom_type = geometry.get("type") - coords = geometry.get("coordinates") - - if not coords: - return None - - all_points: list[tuple[float, float]] = [] - - if geom_type == "Point": - return (coords[0], coords[1]) - elif geom_type == "Polygon": - for ring in coords: - for point in ring: - all_points.append((point[0], point[1])) - elif geom_type == "MultiPolygon": - for polygon in coords: - for ring in polygon: - for point in ring: - all_points.append((point[0], point[1])) - else: - return None - - if not all_points: - return None - - avg_lon = sum(p[0] for p in all_points) / len(all_points) - avg_lat = sum(p[1] for p in all_points) / len(all_points) - return (avg_lon, avg_lat) - - -def _compute_bbox( - geometry: dict[str, Any] | None -) -> tuple[float, float, float, float] | None: - """Compute bounding box from GeoJSON geometry.""" - if not geometry: - return None - - geom_type = geometry.get("type") - coords = geometry.get("coordinates") - - if not coords: - return None - - all_points: list[tuple[float, float]] = [] - - if geom_type == "Point": - return (coords[0], coords[1], coords[0], coords[1]) - elif geom_type == "Polygon": - for ring in coords: - for point in ring: - all_points.append((point[0], point[1])) - elif geom_type == "MultiPolygon": - for polygon in coords: - for ring in polygon: - for point in ring: - all_points.append((point[0], point[1])) - else: - return None - - if not all_points: - return None - - min_lon = min(p[0] for p in all_points) - max_lon = max(p[0] for p in all_points) - min_lat = min(p[1] for p in all_points) - max_lat = max(p[1] for p in all_points) - return (min_lon, min_lat, max_lon, max_lat) - - -def _extract_states_from_codes( - same_codes: list[str], ugc_codes: list[str] -) -> set[str]: - """Extract state abbreviations from SAME and UGC codes.""" - states: set[str] = set() - - for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] - if fips_state in FIPS_TO_STATE: - states.add(FIPS_TO_STATE[fips_state]) - - for code in ugc_codes: - if len(code) >= 2 and code[:2].isalpha(): - states.add(code[:2].upper()) - - return states - - -def _build_regions(same_codes: list[str], ugc_codes: list[str]) -> list[str]: - """Build sorted list of region strings from geocodes.""" - regions: set[str] = set() - - for code in same_codes: - if len(code) >= 2: - fips_state = code[:2] - if fips_state in FIPS_TO_STATE: - state = FIPS_TO_STATE[fips_state] - regions.add(f"US-{state}-FIPS{code}") - - for code in ugc_codes: - if len(code) >= 3 and code[:2].isalpha(): - state = code[:2].upper() - rest = code[2:] - if rest.startswith("C"): - regions.add(f"US-{state}-C{rest[1:]}") - elif rest.startswith("Z"): - regions.add(f"US-{state}-Z{rest[1:]}") - else: - regions.add(f"US-{state}-{rest}") - - return sorted(regions) - - -class NWSAdapter(SourceAdapter): - """National Weather Service alerts adapter.""" - - name = "nws" - - def __init__( - self, - config: AdapterConfig, - cursor_db_path: Path, - ) -> None: - self.cursor_db_path = cursor_db_path - self._session: aiohttp.ClientSession | None = None - self._db: sqlite3.Connection | None = None - - # Extract settings from unified config - self.contact_email: str = config.settings.get("contact_email", "") - - # Parse region from settings - region_dict = config.settings.get("region") - if region_dict: - self.region: RegionConfig | None = RegionConfig(**region_dict) - else: - self.region = None - - async def apply_config(self, new_config: AdapterConfig) -> None: - """Apply new configuration from hot-reload.""" - # Update contact email - self.contact_email = new_config.settings.get("contact_email", "") - - # Update region - region_dict = new_config.settings.get("region") - if region_dict: - self.region = RegionConfig(**region_dict) - else: - self.region = None - - logger.info( - "NWS config applied", - extra={ - "region": region_dict, - "contact_email": self.contact_email, - }, - ) - - def _point_in_region(self, centroid: tuple[float, float] | None) -> bool: - """Check if centroid is within configured region bbox.""" - if self.region is None: - # No region configured = accept all - return True - if centroid is None: - return False - lon, lat = centroid - return ( - self.region.west <= lon <= self.region.east - and self.region.south <= lat <= self.region.north - ) - - async def startup(self) -> None: - """Initialize HTTP session and cursor database.""" - user_agent = f"Central/{__version__} ({self.contact_email})" - self._session = aiohttp.ClientSession( - headers={"User-Agent": user_agent}, - timeout=aiohttp.ClientTimeout(total=30), - ) - - self._db = sqlite3.connect(str(self.cursor_db_path)) - self._db.execute(""" - CREATE TABLE IF NOT EXISTS adapter_cursors ( - adapter TEXT PRIMARY KEY, - cursor_data TEXT NOT NULL, - updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP - ) - """) - self._db.execute(""" - CREATE TABLE IF NOT EXISTS published_ids ( - adapter TEXT NOT NULL, - event_id TEXT NOT NULL, - first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (adapter, event_id) - ) - """) - self._db.execute(""" - CREATE INDEX IF NOT EXISTS published_ids_last_seen - ON published_ids (last_seen) - """) - self._db.commit() - - logger.info( - "NWS adapter started", - extra={ - "region": { - "north": self.region.north, - "south": self.region.south, - "east": self.region.east, - "west": self.region.west, - } if self.region else None, - }, - ) - - async def shutdown(self) -> None: - """Close HTTP session and database.""" - if self._session: - await self._session.close() - self._session = None - if self._db: - self._db.close() - self._db = None - logger.info("NWS adapter shut down") - - def _get_cursor(self) -> str | None: - """Get the stored If-Modified-Since cursor.""" - if not self._db: - return None - cur = self._db.execute( - "SELECT cursor_data FROM adapter_cursors WHERE adapter = ?", - (self.name,) - ) - row = cur.fetchone() - return row[0] if row else None - - def _set_cursor(self, last_modified: str) -> None: - """Store the Last-Modified header for next request.""" - if not self._db: - return - self._db.execute( - """ - INSERT INTO adapter_cursors (adapter, cursor_data, updated) - VALUES (?, ?, CURRENT_TIMESTAMP) - ON CONFLICT (adapter) DO UPDATE SET - cursor_data = excluded.cursor_data, - updated = CURRENT_TIMESTAMP - """, - (self.name, last_modified) - ) - self._db.commit() - - def is_published(self, event_id: str) -> bool: - """Check if an event has already been published.""" - if not self._db: - return False - cur = self._db.execute( - "SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?", - (self.name, event_id) - ) - return cur.fetchone() is not None - - def mark_published(self, event_id: str) -> None: - """Mark an event as published.""" - if not self._db: - return - self._db.execute( - """ - INSERT INTO published_ids (adapter, event_id, first_seen, last_seen) - VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - ON CONFLICT (adapter, event_id) DO UPDATE SET - last_seen = CURRENT_TIMESTAMP - """, - (self.name, event_id) - ) - self._db.commit() - - def bump_last_seen(self, event_id: str) -> None: - """Bump the last_seen timestamp for an event.""" - if not self._db: - return - self._db.execute( - "UPDATE published_ids SET last_seen = CURRENT_TIMESTAMP WHERE adapter = ? AND event_id = ?", - (self.name, event_id) - ) - self._db.commit() - - def sweep_old_ids(self) -> int: - """Remove published_ids older than 8 days. Returns count deleted.""" - if not self._db: - return 0 - cur = self._db.execute( - "DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')" - ) - self._db.commit() - return cur.rowcount - - @retry( - stop=stop_after_attempt(5), - wait=wait_exponential_jitter(initial=1, max=60), - retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)), - reraise=True, - ) - async def _fetch_alerts(self) -> tuple[int, dict[str, Any] | None, str | None]: - """Fetch alerts from NWS API with conditional request.""" - if not self._session: - raise RuntimeError("Session not initialized") - - headers: dict[str, str] = {} - cursor = self._get_cursor() - if cursor: - headers["If-Modified-Since"] = cursor - - async with self._session.get(NWS_API_URL, headers=headers) as resp: - if resp.status in (429, 403): - retry_after = resp.headers.get("Retry-After", "60") - try: - wait_time = int(retry_after) - except ValueError: - wait_time = 60 - logger.warning( - "Rate limited by NWS", - extra={"status": resp.status, "retry_after": wait_time} - ) - await asyncio.sleep(wait_time) - raise aiohttp.ClientError(f"Rate limited: {resp.status}") - - if resp.status == 304: - return (304, None, None) - - resp.raise_for_status() - - data = await resp.json() - last_modified = resp.headers.get("Last-Modified") - - return (resp.status, data, last_modified) - - def _normalize_feature(self, feature: dict[str, Any]) -> Event | None: - """Normalize a GeoJSON feature to an Event.""" - props = feature.get("properties", {}) - geocode = props.get("geocode", {}) - - same_codes = geocode.get("SAME", []) - ugc_codes = geocode.get("UGC", []) - - # Compute geometry data first - geometry = feature.get("geometry") - centroid = _compute_centroid(geometry) - bbox = _compute_bbox(geometry) - - # Filter by region bbox (client-side filtering) - if not self._point_in_region(centroid): - return None - - event_id = feature.get("id") - if not event_id: - logger.warning("Feature missing id", extra={"properties": props}) - return None - - event_type = props.get("event", "Unknown") - category = f"wx.alert.{_snake_case(event_type)}" - - time = _parse_datetime(props.get("sent")) - if not time: - logger.warning("Feature missing sent time", extra={"id": event_id}) - return None - - expires = _parse_datetime(props.get("expires")) - - severity_str = props.get("severity", "Unknown") - severity = SEVERITY_MAP.get(severity_str) - - regions = _build_regions(same_codes, ugc_codes) - primary_region = regions[0] if regions else None - - geo = Geo( - centroid=centroid, - bbox=bbox, - regions=regions, - primary_region=primary_region, - ) - - return Event( - id=event_id, - source="central/adapters/nws", - category=category, - time=time, - expires=expires, - severity=severity, - geo=geo, - data=props, - ) - - async def poll(self) -> AsyncIterator[Event]: - """Poll NWS API for active alerts.""" - try: - status, data, last_modified = await self._fetch_alerts() - except Exception as e: - logger.error("Failed to fetch NWS alerts", extra={"error": str(e)}) - raise - - if status == 304: - logger.info("NWS returned 304 Not Modified") - return - - if last_modified: - self._set_cursor(last_modified) - - features = data.get("features", []) if data else [] - logger.info( - "NWS poll completed", - extra={"status": status, "feature_count": len(features)} - ) - - yielded = 0 - for feature in features: - try: - event = self._normalize_feature(feature) - if event: - yield event - yielded += 1 - except Exception as e: - logger.warning( - "Failed to normalize feature", - extra={"error": str(e), "feature_id": feature.get("id")} - ) - - logger.info("NWS yielded events", extra={"count": yielded}) +"""NWS (National Weather Service) alert adapter.""" + +import asyncio +import logging +import re +import sqlite3 +from collections.abc import AsyncIterator +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import aiohttp +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential_jitter, + retry_if_exception_type, +) + +from central import __version__ +from central.adapter import SourceAdapter +from central.config_models import AdapterConfig, RegionConfig +from central.models import Event, Geo +from shapely.geometry import box as shapely_box, shape as shapely_shape + +logger = logging.getLogger(__name__) + +# FIPS state codes to postal abbreviations +FIPS_TO_STATE: dict[str, str] = { + "01": "AL", "02": "AK", "04": "AZ", "05": "AR", "06": "CA", + "08": "CO", "09": "CT", "10": "DE", "11": "DC", "12": "FL", + "13": "GA", "15": "HI", "16": "ID", "17": "IL", "18": "IN", + "19": "IA", "20": "KS", "21": "KY", "22": "LA", "23": "ME", + "24": "MD", "25": "MA", "26": "MI", "27": "MN", "28": "MS", + "29": "MO", "30": "MT", "31": "NE", "32": "NV", "33": "NH", + "34": "NJ", "35": "NM", "36": "NY", "37": "NC", "38": "ND", + "39": "OH", "40": "OK", "41": "OR", "42": "PA", "44": "RI", + "45": "SC", "46": "SD", "47": "TN", "48": "TX", "49": "UT", + "50": "VT", "51": "VA", "53": "WA", "54": "WV", "55": "WI", + "56": "WY", "60": "AS", "66": "GU", "69": "MP", "72": "PR", + "78": "VI", +} + +SEVERITY_MAP: dict[str, int | None] = { + "Extreme": 4, + "Severe": 3, + "Moderate": 2, + "Minor": 1, + "Unknown": None, +} + +NWS_API_URL = "https://api.weather.gov/alerts/active" + + +def _snake_case(s: str) -> str: + """Convert a string to snake_case.""" + s = re.sub(r"[^a-zA-Z0-9\s]", "", s) + s = re.sub(r"\s+", "_", s.strip()) + return s.lower() + + +def _parse_datetime(s: str | None) -> datetime | None: + """Parse an ISO datetime string to UTC datetime.""" + if not s: + return None + try: + dt = datetime.fromisoformat(s.replace("Z", "+00:00")) + return dt.astimezone(timezone.utc) + except (ValueError, TypeError): + return None + + +def _compute_centroid(geometry: dict[str, Any] | None) -> tuple[float, float] | None: + """Compute centroid from GeoJSON geometry using arithmetic mean of vertices.""" + if not geometry: + return None + + geom_type = geometry.get("type") + coords = geometry.get("coordinates") + + if not coords: + return None + + all_points: list[tuple[float, float]] = [] + + if geom_type == "Point": + return (coords[0], coords[1]) + elif geom_type == "Polygon": + for ring in coords: + for point in ring: + all_points.append((point[0], point[1])) + elif geom_type == "MultiPolygon": + for polygon in coords: + for ring in polygon: + for point in ring: + all_points.append((point[0], point[1])) + else: + return None + + if not all_points: + return None + + avg_lon = sum(p[0] for p in all_points) / len(all_points) + avg_lat = sum(p[1] for p in all_points) / len(all_points) + return (avg_lon, avg_lat) + + +def _compute_bbox( + geometry: dict[str, Any] | None +) -> tuple[float, float, float, float] | None: + """Compute bounding box from GeoJSON geometry.""" + if not geometry: + return None + + geom_type = geometry.get("type") + coords = geometry.get("coordinates") + + if not coords: + return None + + all_points: list[tuple[float, float]] = [] + + if geom_type == "Point": + return (coords[0], coords[1], coords[0], coords[1]) + elif geom_type == "Polygon": + for ring in coords: + for point in ring: + all_points.append((point[0], point[1])) + elif geom_type == "MultiPolygon": + for polygon in coords: + for ring in polygon: + for point in ring: + all_points.append((point[0], point[1])) + else: + return None + + if not all_points: + return None + + min_lon = min(p[0] for p in all_points) + max_lon = max(p[0] for p in all_points) + min_lat = min(p[1] for p in all_points) + max_lat = max(p[1] for p in all_points) + return (min_lon, min_lat, max_lon, max_lat) + + +def _extract_states_from_codes( + same_codes: list[str], ugc_codes: list[str] +) -> set[str]: + """Extract state abbreviations from SAME and UGC codes.""" + states: set[str] = set() + + for code in same_codes: + if len(code) >= 2: + fips_state = code[:2] + if fips_state in FIPS_TO_STATE: + states.add(FIPS_TO_STATE[fips_state]) + + for code in ugc_codes: + if len(code) >= 2 and code[:2].isalpha(): + states.add(code[:2].upper()) + + return states + + +def _build_regions(same_codes: list[str], ugc_codes: list[str]) -> list[str]: + """Build sorted list of region strings from geocodes.""" + regions: set[str] = set() + + for code in same_codes: + if len(code) >= 2: + fips_state = code[:2] + if fips_state in FIPS_TO_STATE: + state = FIPS_TO_STATE[fips_state] + regions.add(f"US-{state}-FIPS{code}") + + for code in ugc_codes: + if len(code) >= 3 and code[:2].isalpha(): + state = code[:2].upper() + rest = code[2:] + if rest.startswith("C"): + regions.add(f"US-{state}-C{rest[1:]}") + elif rest.startswith("Z"): + regions.add(f"US-{state}-Z{rest[1:]}") + else: + regions.add(f"US-{state}-{rest}") + + return sorted(regions) + + +class NWSAdapter(SourceAdapter): + """National Weather Service alerts adapter.""" + + name = "nws" + + def __init__( + self, + config: AdapterConfig, + cursor_db_path: Path, + ) -> None: + self.cursor_db_path = cursor_db_path + self._session: aiohttp.ClientSession | None = None + self._db: sqlite3.Connection | None = None + + # Extract settings from unified config + self.contact_email: str = config.settings.get("contact_email", "") + + # Parse region from settings + region_dict = config.settings.get("region") + if region_dict: + self.region: RegionConfig | None = RegionConfig(**region_dict) + else: + self.region = None + + async def apply_config(self, new_config: AdapterConfig) -> None: + """Apply new configuration from hot-reload.""" + # Update contact email + self.contact_email = new_config.settings.get("contact_email", "") + + # Update region + region_dict = new_config.settings.get("region") + if region_dict: + self.region = RegionConfig(**region_dict) + else: + self.region = None + + logger.info( + "NWS config applied", + extra={ + "region": region_dict, + "contact_email": self.contact_email, + }, + ) + + def _geometry_intersects_region(self, geometry: dict[str, Any] | None) -> bool: + """Check if feature geometry intersects configured region bbox. + + Uses Shapely for proper polygon intersection rather than centroid-only + filtering, avoiding false negatives on large alert polygons. + """ + if self.region is None: + # No region configured = accept all + return True + if geometry is None: + return False + + try: + # Build region box (west, south, east, north) + region_box = shapely_box( + self.region.west, + self.region.south, + self.region.east, + self.region.north, + ) + + # Parse GeoJSON geometry to shapely shape + feature_shape = shapely_shape(geometry) + + return region_box.intersects(feature_shape) + except Exception: + # If geometry parsing fails, fall back to rejecting + return False + + async def startup(self) -> None: + """Initialize HTTP session and cursor database.""" + user_agent = f"Central/{__version__} ({self.contact_email})" + self._session = aiohttp.ClientSession( + headers={"User-Agent": user_agent}, + timeout=aiohttp.ClientTimeout(total=30), + ) + + self._db = sqlite3.connect(str(self.cursor_db_path)) + self._db.execute(""" + CREATE TABLE IF NOT EXISTS adapter_cursors ( + adapter TEXT PRIMARY KEY, + cursor_data TEXT NOT NULL, + updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """) + self._db.execute(""" + CREATE TABLE IF NOT EXISTS published_ids ( + adapter TEXT NOT NULL, + event_id TEXT NOT NULL, + first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (adapter, event_id) + ) + """) + self._db.execute(""" + CREATE INDEX IF NOT EXISTS published_ids_last_seen + ON published_ids (last_seen) + """) + self._db.commit() + + logger.info( + "NWS adapter started", + extra={ + "region": { + "north": self.region.north, + "south": self.region.south, + "east": self.region.east, + "west": self.region.west, + } if self.region else None, + }, + ) + + async def shutdown(self) -> None: + """Close HTTP session and database.""" + if self._session: + await self._session.close() + self._session = None + if self._db: + self._db.close() + self._db = None + logger.info("NWS adapter shut down") + + def _get_cursor(self) -> str | None: + """Get the stored If-Modified-Since cursor.""" + if not self._db: + return None + cur = self._db.execute( + "SELECT cursor_data FROM adapter_cursors WHERE adapter = ?", + (self.name,) + ) + row = cur.fetchone() + return row[0] if row else None + + def _set_cursor(self, last_modified: str) -> None: + """Store the Last-Modified header for next request.""" + if not self._db: + return + self._db.execute( + """ + INSERT INTO adapter_cursors (adapter, cursor_data, updated) + VALUES (?, ?, CURRENT_TIMESTAMP) + ON CONFLICT (adapter) DO UPDATE SET + cursor_data = excluded.cursor_data, + updated = CURRENT_TIMESTAMP + """, + (self.name, last_modified) + ) + self._db.commit() + + def is_published(self, event_id: str) -> bool: + """Check if an event has already been published.""" + if not self._db: + return False + cur = self._db.execute( + "SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?", + (self.name, event_id) + ) + return cur.fetchone() is not None + + def mark_published(self, event_id: str) -> None: + """Mark an event as published.""" + if not self._db: + return + self._db.execute( + """ + INSERT INTO published_ids (adapter, event_id, first_seen, last_seen) + VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (adapter, event_id) DO UPDATE SET + last_seen = CURRENT_TIMESTAMP + """, + (self.name, event_id) + ) + self._db.commit() + + def bump_last_seen(self, event_id: str) -> None: + """Bump the last_seen timestamp for an event.""" + if not self._db: + return + self._db.execute( + "UPDATE published_ids SET last_seen = CURRENT_TIMESTAMP WHERE adapter = ? AND event_id = ?", + (self.name, event_id) + ) + self._db.commit() + + def sweep_old_ids(self) -> int: + """Remove published_ids older than 8 days. Returns count deleted.""" + if not self._db: + return 0 + cur = self._db.execute( + "DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')" + ) + self._db.commit() + return cur.rowcount + + @retry( + stop=stop_after_attempt(5), + wait=wait_exponential_jitter(initial=1, max=60), + retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError)), + reraise=True, + ) + async def _fetch_alerts(self) -> tuple[int, dict[str, Any] | None, str | None]: + """Fetch alerts from NWS API with conditional request.""" + if not self._session: + raise RuntimeError("Session not initialized") + + headers: dict[str, str] = {} + cursor = self._get_cursor() + if cursor: + headers["If-Modified-Since"] = cursor + + async with self._session.get(NWS_API_URL, headers=headers) as resp: + if resp.status in (429, 403): + retry_after = resp.headers.get("Retry-After", "60") + try: + wait_time = int(retry_after) + except ValueError: + wait_time = 60 + logger.warning( + "Rate limited by NWS", + extra={"status": resp.status, "retry_after": wait_time} + ) + await asyncio.sleep(wait_time) + raise aiohttp.ClientError(f"Rate limited: {resp.status}") + + if resp.status == 304: + return (304, None, None) + + resp.raise_for_status() + + data = await resp.json() + last_modified = resp.headers.get("Last-Modified") + + return (resp.status, data, last_modified) + + def _normalize_feature(self, feature: dict[str, Any]) -> Event | None: + """Normalize a GeoJSON feature to an Event.""" + props = feature.get("properties", {}) + geocode = props.get("geocode", {}) + + same_codes = geocode.get("SAME", []) + ugc_codes = geocode.get("UGC", []) + + # Compute geometry data first + geometry = feature.get("geometry") + centroid = _compute_centroid(geometry) + bbox = _compute_bbox(geometry) + + # Filter by region bbox (client-side filtering) + if not self._geometry_intersects_region(geometry): + return None + + event_id = feature.get("id") + if not event_id: + logger.warning("Feature missing id", extra={"properties": props}) + return None + + event_type = props.get("event", "Unknown") + category = f"wx.alert.{_snake_case(event_type)}" + + time = _parse_datetime(props.get("sent")) + if not time: + logger.warning("Feature missing sent time", extra={"id": event_id}) + return None + + expires = _parse_datetime(props.get("expires")) + + severity_str = props.get("severity", "Unknown") + severity = SEVERITY_MAP.get(severity_str) + + regions = _build_regions(same_codes, ugc_codes) + primary_region = regions[0] if regions else None + + geo = Geo( + centroid=centroid, + bbox=bbox, + regions=regions, + primary_region=primary_region, + ) + + return Event( + id=event_id, + source="central/adapters/nws", + category=category, + time=time, + expires=expires, + severity=severity, + geo=geo, + data=props, + ) + + async def poll(self) -> AsyncIterator[Event]: + """Poll NWS API for active alerts.""" + try: + status, data, last_modified = await self._fetch_alerts() + except Exception as e: + logger.error("Failed to fetch NWS alerts", extra={"error": str(e)}) + raise + + if status == 304: + logger.info("NWS returned 304 Not Modified") + return + + if last_modified: + self._set_cursor(last_modified) + + features = data.get("features", []) if data else [] + logger.info( + "NWS poll completed", + extra={"status": status, "feature_count": len(features)} + ) + + yielded = 0 + for feature in features: + try: + event = self._normalize_feature(feature) + if event: + yield event + yielded += 1 + except Exception as e: + logger.warning( + "Failed to normalize feature", + extra={"error": str(e), "feature_id": feature.get("id")} + ) + + logger.info("NWS yielded events", extra={"count": yielded}) diff --git a/src/central/config_models.py b/src/central/config_models.py index 0447c56..a5ba0d5 100644 --- a/src/central/config_models.py +++ b/src/central/config_models.py @@ -22,6 +22,8 @@ class RegionConfig(BaseModel): ) if self.east == self.west: raise ValueError("east and west cannot be equal (zero-width bbox)") + if self.east < self.west: + raise ValueError("antimeridian-crossing bboxes not supported") return self diff --git a/uv.lock b/uv.lock index 555c2dd..75abe93 100644 --- a/uv.lock +++ b/uv.lock @@ -45,15 +45,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/ca/eadf6f9c8fa5e31d40993e3db153fb5ed0b11008ad5d9de98a95045bed84/aiohttp-3.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:110e448e02c729bcebb18c60b9214a87ba33bac4a9fa5e9a5f139938b56c6cb1", size = 460446, upload-time = "2026-03-31T21:58:10.945Z" }, ] -[[package]] -name = "aiolimiter" -version = "1.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/23/b52debf471f7a1e42e362d959a3982bdcb4fe13a5d46e63d28868807a79c/aiolimiter-1.2.1.tar.gz", hash = "sha256:e02a37ea1a855d9e832252a105420ad4d15011505512a1a1d814647451b5cca9", size = 7185, upload-time = "2024-12-08T15:31:51.496Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/ba/df6e8e1045aebc4778d19b8a3a9bc1808adb1619ba94ca354d9ba17d86c3/aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7", size = 6711, upload-time = "2024-12-08T15:31:49.874Z" }, -] - [[package]] name = "aiosignal" version = "1.4.0" @@ -130,11 +121,13 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "aiohttp" }, - { name = "aiolimiter" }, { name = "asyncpg" }, { name = "cloudevents" }, + { name = "cryptography" }, { name = "nats-py" }, { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "shapely" }, { name = "tenacity" }, ] @@ -149,11 +142,13 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiohttp", specifier = ">=3.13.5" }, - { name = "aiolimiter", specifier = ">=1.2.1" }, { name = "asyncpg", specifier = ">=0.31.0" }, { name = "cloudevents", specifier = ">=2.0.0" }, + { name = "cryptography", specifier = ">=44.0.0" }, { name = "nats-py", specifier = ">=2.14.0" }, { name = "pydantic", specifier = ">=2,<3" }, + { name = "pydantic-settings", specifier = ">=2.7.0" }, + { name = "shapely", specifier = ">=2.0" }, { name = "tenacity", specifier = ">=9.1.4" }, ] @@ -165,6 +160,29 @@ dev = [ { name = "ruff", specifier = ">=0.15.13" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, +] + [[package]] name = "cloudevents" version = "2.0.0" @@ -187,6 +205,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "cryptography" +version = "48.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" }, + { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" }, + { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" }, + { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" }, + { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" }, + { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" }, + { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" }, + { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" }, + { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" }, + { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" }, + { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" }, + { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" }, + { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" }, + { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" }, + { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" }, + { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" }, + { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" }, + { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" }, + { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" }, + { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" }, + { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" }, + { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" }, +] + [[package]] name = "deprecation" version = "2.1.0" @@ -331,6 +388,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/39/0e87753df1072254bac190b33ed34b264f28f6aa9bea0f01b7e818071756/nats_py-2.14.0-py3-none-any.whl", hash = "sha256:4116f5d2233ce16e63c3d5538fa40a5e207f75fcf42a741773929ddf1e29d19d", size = 82259, upload-time = "2026-02-23T22:45:00.152Z" }, ] +[[package]] +name = "numpy" +version = "2.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/50/8e/b8041bc719f056afd864478029d52214789341ac6583437b0ee5031e9530/numpy-2.4.5.tar.gz", hash = "sha256:ca670567a5683b7c1670ec03e0ddd5862e10934e92a70751d68d7b7b74ca7f9f", size = 20735669, upload-time = "2026-05-15T20:25:19.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/18/3275231e98620002681c922e792db04d72c356e9d8073c387344fc0e4ff1/numpy-2.4.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:654fb8674b61b1c4bd568f944d13a908566fdcb0d797303521d4149d16da05ef", size = 16689166, upload-time = "2026-05-15T20:22:50.761Z" }, + { url = "https://files.pythonhosted.org/packages/db/23/000aab6a16bdec53307f0f72546b57a3ac9266a62d8c257bee97d85fd078/numpy-2.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4cd9f6fa7ce10dc4627f2bb81dd9075dab67e94632e04c2b638e12575ddaa862", size = 14699514, upload-time = "2026-05-15T20:22:53.678Z" }, + { url = "https://files.pythonhosted.org/packages/47/cc/ddaf3af9c46966fef5be879256f213d85a0c56c75d07a3b7defec7cf6b4c/numpy-2.4.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4f5bc96d35d94e4ceab8b38a92241b4611e95dc44e63b9f1fa2a331858ee3507", size = 5204601, upload-time = "2026-05-15T20:22:56.257Z" }, + { url = "https://files.pythonhosted.org/packages/07/ea/627fadd11959b3c7759008f34c92a35af8ff942dd8284a66ced648bbe516/numpy-2.4.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:4bb33e900ee81730ad77a258965134aa8ceac805124f7e5229347beda4b8d0aa", size = 6551360, upload-time = "2026-05-15T20:22:58.334Z" }, + { url = "https://files.pythonhosted.org/packages/a1/47/0728b986b8682d742ff68c16baa5af9d185484abfc635c5cc700f44e62be/numpy-2.4.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32f8f852273ef32b291201ac2a2c97629c4a1ee8632bb670e3443eaa09fc2e72", size = 15671157, upload-time = "2026-05-15T20:23:01.081Z" }, + { url = "https://files.pythonhosted.org/packages/d1/0b/b905ae82d9419dc38123523862db64978ca2954b69609c3ae8fdaca1084c/numpy-2.4.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685681e956fc8dcb75adc6ff26694e1dfd738b24bd8d4696c51ca0110157f912", size = 16645703, upload-time = "2026-05-15T20:23:04.358Z" }, + { url = "https://files.pythonhosted.org/packages/5f/24/e27fc3f5236b4118ed9eed67111675f5c61a07ea333acec87c869c3b359d/numpy-2.4.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6f64dd84b277a737eb59513f6b9bb6195bf41ab11941ef15b2562dbab43fa8ef", size = 17021018, upload-time = "2026-05-15T20:23:07.021Z" }, + { url = "https://files.pythonhosted.org/packages/d3/a7/9041af38d527ab80a06a93570a77e29425b41507ad41f6acf5da78cfb4a4/numpy-2.4.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b42d9496f79e3a728192f05a42d86e36163217b7cdecb3813d0028a0aa6b72d7", size = 18368768, upload-time = "2026-05-15T20:23:09.44Z" }, + { url = "https://files.pythonhosted.org/packages/49/82/326a014442f32c2663434fd424d9298791f47f8a0f17585ad60519a5606e/numpy-2.4.5-cp312-cp312-win32.whl", hash = "sha256:86d980970f5110595ca14855768073b08585fc1acc36895de303e039e7dee4a5", size = 5962819, upload-time = "2026-05-15T20:23:11.631Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/cbf5d391b0b3a5e8cad264603e2fae256b0bde8ce43566b13b78faedc659/numpy-2.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:3333dba6a4e611d666f69e177ba8fe4140366ff681a5feb2374d3fd4fff3acb6", size = 12321621, upload-time = "2026-05-15T20:23:14.305Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d0/0f18909d9bc37a5f3f969fc737d2bb5df9f2ff295f71b467e6f52a0d6c4e/numpy-2.4.5-cp312-cp312-win_arm64.whl", hash = "sha256:4593d197270b894efeb538dcbe227e4bcf1c77f88c4c6bf933ead812cfaa4453", size = 10221430, upload-time = "2026-05-15T20:23:16.887Z" }, +] + [[package]] name = "packaging" version = "26.2" @@ -384,6 +460,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/ed/1cdcab6ba3d6ab7feca11fc14f0eeea80755bb53ef4e892079f31b10a25f/propcache-0.5.2-py3-none-any.whl", hash = "sha256:be1ddfcbb376e3de5d2e2db1d58d6d67463e6b4f9f040c000de8e300295465fe", size = 14036, upload-time = "2026-05-08T21:02:10.673Z" }, ] +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + [[package]] name = "pydantic" version = "2.13.4" @@ -429,6 +514,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590, upload-time = "2026-05-06T13:39:29.883Z" }, ] +[[package]] +name = "pydantic-settings" +version = "2.14.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/60/1d1e59c9c90d54591469ada7d268251f71c24bdb765f1a8a832cee8c6653/pydantic_settings-2.14.1.tar.gz", hash = "sha256:e874d3bec7e787b0c9958277956ed9b4dd5de6a80e162188fdaff7c5e26fd5fa", size = 235551, upload-time = "2026-05-08T13:40:06.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" }, +] + [[package]] name = "pygments" version = "2.20.0" @@ -479,6 +578,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + [[package]] name = "ruff" version = "0.15.13" @@ -504,6 +612,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/36/9c015cd052fca743dae8cb2aeb16b551444787467db42ceab0fc968865af/ruff-0.15.13-py3-none-win_arm64.whl", hash = "sha256:2471da9bd1068c8c064b5fd9c0c4b6dddffd6369cb1cd68b29993b1709ff1b21", size = 11179336, upload-time = "2026-05-14T13:44:33.026Z" }, ] +[[package]] +name = "shapely" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" }, + { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" }, + { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, +] + [[package]] name = "six" version = "1.17.0" From f7a55c3cc4ddff03b7b2d14602852ce7b6a300b7 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:08:00 +0000 Subject: [PATCH 14/22] docs: add Phase 1a-5 verification report Documents test results for: - Gate 5: max_bytes self-loop prevention (PASS) - Gate 6: bbox hot-reload (PASS) Co-Authored-By: Claude Opus 4.5 --- docs/PHASE-1A-5-VERIFICATION.md | 83 +++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 docs/PHASE-1A-5-VERIFICATION.md diff --git a/docs/PHASE-1A-5-VERIFICATION.md b/docs/PHASE-1A-5-VERIFICATION.md new file mode 100644 index 0000000..c9867e6 --- /dev/null +++ b/docs/PHASE-1A-5-VERIFICATION.md @@ -0,0 +1,83 @@ +# Phase 1a-5 Verification Report + +## Gate 5: max_bytes Self-Loop Prevention + +**Objective**: Verify that `max_bytes` updates to `config.streams` do NOT trigger +a NOTIFY (preventing infinite supervisor recompute loops). + +**Test Procedure**: +1. Started supervisor with stream retention enabled +2. Directly updated `max_bytes` via SQL: + ```sql + UPDATE config.streams SET max_bytes = 209715200 WHERE name = 'CENTRAL_META'; + ``` +3. Monitored supervisor logs for any stream change handling + +**Result**: PASS + +No NOTIFY was triggered. The column-filtered trigger in `003_add_streams_table.sql` +correctly fires only on `max_age_s` changes: + +```sql +ELSIF TG_OP = 'UPDATE' AND OLD.max_age_s IS DISTINCT FROM NEW.max_age_s THEN + PERFORM pg_notify('config_changed', 'streams:' || NEW.name); +``` + +This prevents the supervisor's `recompute_max_bytes()` from creating a feedback loop. + +--- + +## Gate 6: bbox Hot-Reload + +**Objective**: Verify that changes to the NWS adapter's `region` bbox are +picked up via hot-reload without supervisor restart. + +**Test Procedure**: +1. Updated the NWS adapter's region via SQL: + ```sql + UPDATE config.adapters SET settings = jsonb_set( + settings, '{region}', + '{"north": 48.0, "south": 45.0, "east": -115.0, "west": -125.0}'::jsonb + ) WHERE name = 'nws'; + ``` +2. Observed supervisor logs for config reload + +**Result**: PASS + +Supervisor log showed immediate config application: +```json +{"msg": "NWS config applied", "region": {"east": -115.0, "west": -125.0, "north": 48.0, "south": 45.0}} +``` + +The NOTIFY trigger on `config.adapters` fired and the supervisor's +`_handle_adapter_change()` correctly invoked `adapter.apply_config()`. + +--- + +## Additional Verification + +### Polygon Intersection Filter + +The `_geometry_intersects_region()` method was tested with: +- Shapely 2.1.2 installed via `uv sync` +- GeoJSON geometry parsing via `shapely.geometry.shape()` +- Region box creation via `shapely.geometry.box()` +- Intersection test via `region_box.intersects(feature_shape)` + +### Antimeridian Rejection + +The `RegionConfig` validator was tested: +```python +>>> RegionConfig(north=49.0, south=31.0, east=-170.0, west=170.0) +ValidationError: antimeridian-crossing bboxes not supported +``` + +--- + +## Verification Date + +2026-05-16T19:06:00Z + +## Verified By + +Claude Code (automated verification) From b42589c69c0f9bcc4dc62045204defe163ab3525 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:58:20 +0000 Subject: [PATCH 15/22] feat(schema): add FIRMS adapter and CENTRAL_FIRE stream Migration 005 seeds: - config.adapters row for firms (300s cadence, PNW bbox) - config.streams row for CENTRAL_FIRE (7d retention) Co-Authored-By: Claude Opus 4.5 --- sql/migrations/005_add_firms_adapter.sql | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 sql/migrations/005_add_firms_adapter.sql diff --git a/sql/migrations/005_add_firms_adapter.sql b/sql/migrations/005_add_firms_adapter.sql new file mode 100644 index 0000000..65b8791 --- /dev/null +++ b/sql/migrations/005_add_firms_adapter.sql @@ -0,0 +1,27 @@ +-- Migration: 005_add_firms_adapter +-- Seeds FIRMS adapter configuration and CENTRAL_FIRE stream. + +-- Seed FIRMS adapter row +INSERT INTO config.adapters (name, enabled, cadence_s, settings) +VALUES ( + 'firms', + true, + 300, + jsonb_build_object( + 'region', jsonb_build_object( + 'north', 49.5, + 'south', 31.0, + 'east', -102.0, + 'west', -124.5 + ), + 'api_key_alias', 'firms', + 'satellites', jsonb_build_array( + 'VIIRS_SNPP_NRT', + 'VIIRS_NOAA20_NRT' + ) + ) +); + +-- Seed CENTRAL_FIRE stream row +INSERT INTO config.streams (name, max_age_s, max_bytes) +VALUES ('CENTRAL_FIRE', 604800, 1073741824); From 0097163edf57f7fc0b627419f0799dd440710821 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:58:31 +0000 Subject: [PATCH 16/22] feat(adapters): add FIRMS fire hotspot adapter NASA FIRMS adapter for VIIRS satellite fire detections: - Polls VIIRS_SNPP_NRT and VIIRS_NOAA20_NRT satellites - Deduplication via stable ID (satellite:date:time:lat:lon) - Hot-reload support for region, satellites, and API key - Confidence mapping: l/n/h -> low/nominal/high - Severity: high=3, nominal=2, low=1 Includes comprehensive unit tests for: - CSV parsing and event generation - Deduplication logic - URL building and config application Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/firms.py | 430 ++++++++++++++++++++++++++++++++++ tests/test_firms.py | 410 ++++++++++++++++++++++++++++++++ 2 files changed, 840 insertions(+) create mode 100644 src/central/adapters/firms.py create mode 100644 tests/test_firms.py diff --git a/src/central/adapters/firms.py b/src/central/adapters/firms.py new file mode 100644 index 0000000..de3603c --- /dev/null +++ b/src/central/adapters/firms.py @@ -0,0 +1,430 @@ +"""FIRMS (Fire Information for Resource Management System) adapter.""" + +import csv +import logging +import sqlite3 +from collections.abc import AsyncIterator +from datetime import datetime, timezone +from io import StringIO +from pathlib import Path +from typing import Any + +import aiohttp +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential_jitter, + retry_if_exception_type, +) + +from central.adapter import SourceAdapter +from central.config_models import AdapterConfig, RegionConfig +from central.config_store import ConfigStore +from central.models import Event, Geo + +logger = logging.getLogger(__name__) + +# FIRMS API base URL +FIRMS_API_BASE = "https://firms.modaps.eosdis.nasa.gov/api/area/csv" + +# Satellite name mapping +SATELLITE_SHORT = { + "VIIRS_SNPP_NRT": "viirs_snpp", + "VIIRS_NOAA20_NRT": "viirs_noaa20", + "VIIRS_NOAA21_NRT": "viirs_noaa21", +} + +# Confidence mapping +CONFIDENCE_MAP = { + "l": "low", + "n": "nominal", + "h": "high", +} + +# Severity mapping (confidence -> severity level) +SEVERITY_MAP = { + "high": 3, + "nominal": 2, + "low": 1, +} + + +class FIRMSAdapter(SourceAdapter): + """NASA FIRMS fire hotspot adapter.""" + + name = "firms" + + def __init__( + self, + config: AdapterConfig, + config_store: ConfigStore, + cursor_db_path: Path, + ) -> None: + self._config_store = config_store + self._cursor_db_path = cursor_db_path + self._session: aiohttp.ClientSession | None = None + self._db: sqlite3.Connection | None = None + self._api_key: str | None = None + + # Extract settings from config + self._api_key_alias: str = config.settings.get("api_key_alias", "firms") + self._satellites: list[str] = config.settings.get( + "satellites", ["VIIRS_SNPP_NRT", "VIIRS_NOAA20_NRT"] + ) + + # Parse region from settings + region_dict = config.settings.get("region") + if region_dict: + self.region: RegionConfig | None = RegionConfig(**region_dict) + else: + self.region = None + + async def apply_config(self, new_config: AdapterConfig) -> None: + """Apply new configuration from hot-reload.""" + old_alias = self._api_key_alias + + # Update settings + self._api_key_alias = new_config.settings.get("api_key_alias", "firms") + self._satellites = new_config.settings.get( + "satellites", ["VIIRS_SNPP_NRT", "VIIRS_NOAA20_NRT"] + ) + + # Update region + region_dict = new_config.settings.get("region") + if region_dict: + self.region = RegionConfig(**region_dict) + else: + self.region = None + + # If API key alias changed, re-fetch the key + if self._api_key_alias != old_alias: + self._api_key = await self._config_store.get_api_key(self._api_key_alias) + if self._api_key: + logger.info("FIRMS API key reloaded", extra={"alias": self._api_key_alias}) + else: + logger.warning( + "FIRMS API key not found after alias change", + extra={"alias": self._api_key_alias}, + ) + + logger.info( + "FIRMS config applied", + extra={ + "region": region_dict, + "satellites": self._satellites, + "api_key_alias": self._api_key_alias, + }, + ) + + async def startup(self) -> None: + """Initialize HTTP session, dedup tracker, and fetch API key.""" + # Fetch API key + self._api_key = await self._config_store.get_api_key(self._api_key_alias) + if not self._api_key: + logger.error( + "FIRMS API key not found - polling will be skipped until key is set", + extra={"alias": self._api_key_alias}, + ) + + # Initialize HTTP session + self._session = aiohttp.ClientSession( + timeout=aiohttp.ClientTimeout(total=60), + ) + + # Initialize dedup tracker (shared sqlite DB with NWS) + self._db = sqlite3.connect(str(self._cursor_db_path)) + self._db.execute(""" + CREATE TABLE IF NOT EXISTS published_ids ( + adapter TEXT NOT NULL, + event_id TEXT NOT NULL, + first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (adapter, event_id) + ) + """) + self._db.execute(""" + CREATE INDEX IF NOT EXISTS published_ids_last_seen + ON published_ids (last_seen) + """) + self._db.commit() + + # Sweep old entries on startup (48h for FIRMS) + self._sweep_old_ids() + + logger.info( + "FIRMS adapter started", + extra={ + "region": { + "north": self.region.north, + "south": self.region.south, + "east": self.region.east, + "west": self.region.west, + } if self.region else None, + "satellites": self._satellites, + "api_key_present": self._api_key is not None, + }, + ) + + async def shutdown(self) -> None: + """Close HTTP session and database.""" + if self._session: + await self._session.close() + self._session = None + if self._db: + self._db.close() + self._db = None + logger.info("FIRMS adapter shut down") + + def _is_published(self, stable_id: str) -> bool: + """Check if an event has already been published.""" + if not self._db: + return False + cur = self._db.execute( + "SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?", + (self.name, stable_id), + ) + return cur.fetchone() is not None + + def _mark_published(self, stable_id: str) -> None: + """Mark an event as published.""" + if not self._db: + return + self._db.execute( + """ + INSERT INTO published_ids (adapter, event_id, first_seen, last_seen) + VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (adapter, event_id) DO UPDATE SET + last_seen = CURRENT_TIMESTAMP + """, + (self.name, stable_id), + ) + self._db.commit() + + def _sweep_old_ids(self) -> int: + """Remove published_ids older than 48 hours. Returns count deleted.""" + if not self._db: + return 0 + cur = self._db.execute( + "DELETE FROM published_ids WHERE adapter = ? AND last_seen < datetime('now', '-48 hours')", + (self.name,), + ) + self._db.commit() + count = cur.rowcount + if count > 0: + logger.info("FIRMS swept old dedup entries", extra={"count": count}) + return count + + def _build_stable_id( + self, satellite: str, acq_date: str, acq_time: str, lat: float, lon: float + ) -> str: + """Build stable ID for deduplication.""" + # Round lat/lon to 0.001 degrees to handle floating-point comparison + lat_rounded = round(lat, 3) + lon_rounded = round(lon, 3) + return f"{satellite}:{acq_date}:{acq_time}:{lat_rounded}:{lon_rounded}" + + def _build_url(self, satellite: str) -> str | None: + """Build FIRMS API URL for a satellite.""" + if not self._api_key or not self.region: + return None + + # Area format: west,south,east,north + area = f"{self.region.west},{self.region.south},{self.region.east},{self.region.north}" + return f"{FIRMS_API_BASE}/{self._api_key}/{satellite}/{area}/1" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential_jitter(initial=2, max=30), + retry=retry_if_exception_type((aiohttp.ClientError,)), + reraise=True, + ) + async def _fetch_csv(self, url: str) -> str: + """Fetch CSV data from FIRMS API.""" + if not self._session: + raise RuntimeError("Session not initialized") + + async with self._session.get(url) as resp: + # Check for error responses + content_type = resp.headers.get("Content-Type", "") + if "text/html" in content_type: + text = await resp.text() + logger.error( + "FIRMS returned HTML (likely auth error)", + extra={"status": resp.status, "preview": text[:200]}, + ) + raise ValueError("FIRMS returned HTML instead of CSV") + + resp.raise_for_status() + return await resp.text() + + def _parse_csv(self, csv_text: str, satellite: str) -> list[dict[str, Any]]: + """Parse FIRMS CSV response into list of dicts.""" + rows = [] + reader = csv.DictReader(StringIO(csv_text)) + + for row in reader: + try: + # Parse required fields + lat = float(row["latitude"]) + lon = float(row["longitude"]) + acq_date = row["acq_date"] + acq_time = row["acq_time"] + confidence_raw = row.get("confidence", "n").lower() + confidence = CONFIDENCE_MAP.get(confidence_raw, "nominal") + + rows.append({ + "latitude": lat, + "longitude": lon, + "bright_ti4": float(row.get("bright_ti4", 0)) if row.get("bright_ti4") else None, + "bright_ti5": float(row.get("bright_ti5", 0)) if row.get("bright_ti5") else None, + "scan": float(row.get("scan", 0)) if row.get("scan") else None, + "track": float(row.get("track", 0)) if row.get("track") else None, + "acq_date": acq_date, + "acq_time": acq_time, + "satellite": row.get("satellite", satellite), + "instrument": row.get("instrument", "VIIRS"), + "confidence": confidence, + "confidence_raw": confidence_raw, + "version": row.get("version", ""), + "frp": float(row.get("frp", 0)) if row.get("frp") else None, + "daynight": row.get("daynight", ""), + }) + except (KeyError, ValueError) as e: + logger.warning( + "Failed to parse FIRMS row", + extra={"error": str(e), "row": dict(row)}, + ) + continue + + return rows + + def _row_to_event(self, row: dict[str, Any], satellite: str) -> Event: + """Convert a parsed CSV row to an Event.""" + satellite_short = SATELLITE_SHORT.get(satellite, satellite.lower().replace("_nrt", "")) + confidence = row["confidence"] + severity = SEVERITY_MAP.get(confidence, 1) + + # Parse acquisition time + acq_date = row["acq_date"] + acq_time = row["acq_time"] + # acq_time is HHMM format + try: + time = datetime.strptime( + f"{acq_date} {acq_time}", "%Y-%m-%d %H%M" + ).replace(tzinfo=timezone.utc) + except ValueError: + time = datetime.now(timezone.utc) + + lat = row["latitude"] + lon = row["longitude"] + + # Build stable ID + stable_id = self._build_stable_id(satellite, acq_date, acq_time, lat, lon) + + geo = Geo( + centroid=(lon, lat), # GeoJSON order: lon, lat + bbox=(lon, lat, lon, lat), # Point bbox + regions=[], + primary_region=None, + ) + + return Event( + id=stable_id, + source="central/adapters/firms", + category=f"fire.hotspot.{satellite_short}.{confidence}", + time=time, + expires=None, + severity=severity, + geo=geo, + data=row, + ) + + async def poll(self) -> AsyncIterator[Event]: + """Poll FIRMS API for fire hotspots.""" + # Check API key + if not self._api_key: + # Try to fetch again in case it was added + self._api_key = await self._config_store.get_api_key(self._api_key_alias) + if not self._api_key: + logger.warning( + "FIRMS API key still not available, skipping poll", + extra={"alias": self._api_key_alias}, + ) + return + + if not self.region: + logger.warning("FIRMS region not configured, skipping poll") + return + + # Sweep old dedup entries periodically + self._sweep_old_ids() + + total_features = 0 + total_new = 0 + + for satellite in self._satellites: + url = self._build_url(satellite) + if not url: + continue + + try: + csv_text = await self._fetch_csv(url) + rows = self._parse_csv(csv_text, satellite) + feature_count = len(rows) + total_features += feature_count + + new_count = 0 + for row in rows: + stable_id = self._build_stable_id( + satellite, + row["acq_date"], + row["acq_time"], + row["latitude"], + row["longitude"], + ) + + if self._is_published(stable_id): + continue + + event = self._row_to_event(row, satellite) + yield event + self._mark_published(stable_id) + new_count += 1 + + total_new += new_count + logger.info( + "FIRMS satellite poll completed", + extra={ + "satellite": satellite, + "feature_count": feature_count, + "new_count": new_count, + }, + ) + + except Exception as e: + logger.error( + "FIRMS poll failed for satellite", + extra={"satellite": satellite, "error": str(e)}, + ) + continue + + logger.info( + "FIRMS poll completed", + extra={ + "total_features": total_features, + "total_new": total_new, + "satellites": self._satellites, + }, + ) + + +def subject_for_fire_hotspot(ev: Event) -> str: + """Compute the NATS subject for a fire hotspot event. + + Subject format: central.fire.hotspot.. + + The category already contains the satellite and confidence info, + so we just prefix with 'central.'. + """ + # category is "fire.hotspot.." + return f"central.{ev.category}" diff --git a/tests/test_firms.py b/tests/test_firms.py new file mode 100644 index 0000000..fb42251 --- /dev/null +++ b/tests/test_firms.py @@ -0,0 +1,410 @@ +"""Tests for FIRMS adapter.""" + +import pytest +from datetime import datetime, timezone +from unittest.mock import AsyncMock, MagicMock, patch +from pathlib import Path +import tempfile + +from central.adapters.firms import ( + FIRMSAdapter, + CONFIDENCE_MAP, + SATELLITE_SHORT, + subject_for_fire_hotspot, +) +from central.config_models import AdapterConfig, RegionConfig +from central.models import Event, Geo + + +# Sample FIRMS CSV response +SAMPLE_CSV = """latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight +45.123,-116.456,320.5,0.39,0.36,2026-05-16,1430,N,VIIRS,h,2.0NRT,290.2,15.3,D +46.789,-117.012,305.2,0.41,0.38,2026-05-16,1430,N,VIIRS,n,2.0NRT,285.1,8.7,D +45.123,-116.456,318.9,0.40,0.37,2026-05-16,1430,N,VIIRS,l,2.0NRT,288.5,12.1,D +""" + +# Sample CSV with duplicate (same location, date, time) +SAMPLE_CSV_WITH_DUPE = """latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight +45.123,-116.456,320.5,0.39,0.36,2026-05-16,1430,N,VIIRS,h,2.0NRT,290.2,15.3,D +45.123,-116.456,320.5,0.39,0.36,2026-05-16,1430,N,VIIRS,h,2.0NRT,290.2,15.3,D +""" + + +def make_adapter_config( + region: dict | None = None, + satellites: list[str] | None = None, +) -> AdapterConfig: + """Create an AdapterConfig for testing.""" + settings = { + "api_key_alias": "firms", + "satellites": satellites or ["VIIRS_SNPP_NRT", "VIIRS_NOAA20_NRT"], + } + if region: + settings["region"] = region + else: + settings["region"] = { + "north": 49.5, + "south": 31.0, + "east": -102.0, + "west": -124.5, + } + + return AdapterConfig( + name="firms", + enabled=True, + cadence_s=300, + settings=settings, + updated_at=datetime.now(timezone.utc), + ) + + +@pytest.fixture +def temp_db_path(): + """Create a temporary database path for testing.""" + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + yield Path(f.name) + + +@pytest.fixture +def mock_config_store(): + """Create a mock ConfigStore.""" + store = MagicMock() + store.get_api_key = AsyncMock(return_value="test_api_key") + return store + + +class TestConfidenceMapping: + """Test confidence value mapping.""" + + def test_low_confidence(self): + assert CONFIDENCE_MAP["l"] == "low" + + def test_nominal_confidence(self): + assert CONFIDENCE_MAP["n"] == "nominal" + + def test_high_confidence(self): + assert CONFIDENCE_MAP["h"] == "high" + + +class TestSatelliteShortNames: + """Test satellite short name mapping.""" + + def test_snpp_short_name(self): + assert SATELLITE_SHORT["VIIRS_SNPP_NRT"] == "viirs_snpp" + + def test_noaa20_short_name(self): + assert SATELLITE_SHORT["VIIRS_NOAA20_NRT"] == "viirs_noaa20" + + def test_noaa21_short_name(self): + assert SATELLITE_SHORT["VIIRS_NOAA21_NRT"] == "viirs_noaa21" + + +class TestStableIdGeneration: + """Test stable ID generation for deduplication.""" + + @pytest.mark.asyncio + async def test_stable_id_format(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + stable_id = adapter._build_stable_id( + satellite="VIIRS_SNPP_NRT", + acq_date="2026-05-16", + acq_time="1430", + lat=45.1234567, + lon=-116.4567890, + ) + + # Should be rounded to 3 decimal places + assert stable_id == "VIIRS_SNPP_NRT:2026-05-16:1430:45.123:-116.457" + + @pytest.mark.asyncio + async def test_stable_id_rounding(self, temp_db_path, mock_config_store): + """Test that small lat/lon differences within 0.001 round to same ID.""" + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + # Values that differ by less than 0.0005 should round to same value + id1 = adapter._build_stable_id("SAT", "2026-05-16", "1430", 45.1234, -116.4564) + id2 = adapter._build_stable_id("SAT", "2026-05-16", "1430", 45.1232, -116.4562) + + # Both should round to 45.124, -116.457 + assert id1 == id2 + + +class TestCsvParsing: + """Test CSV parsing.""" + + @pytest.mark.asyncio + async def test_parse_csv_rows(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + rows = adapter._parse_csv(SAMPLE_CSV, "VIIRS_SNPP_NRT") + + assert len(rows) == 3 + assert rows[0]["latitude"] == 45.123 + assert rows[0]["longitude"] == -116.456 + assert rows[0]["confidence"] == "high" + assert rows[1]["confidence"] == "nominal" + assert rows[2]["confidence"] == "low" + + @pytest.mark.asyncio + async def test_parse_csv_brightness(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + rows = adapter._parse_csv(SAMPLE_CSV, "VIIRS_SNPP_NRT") + + assert rows[0]["bright_ti4"] == 320.5 + assert rows[0]["bright_ti5"] == 290.2 + assert rows[0]["frp"] == 15.3 + + +class TestEventGeneration: + """Test Event generation from CSV rows.""" + + @pytest.mark.asyncio + async def test_event_category(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + rows = adapter._parse_csv(SAMPLE_CSV, "VIIRS_SNPP_NRT") + event = adapter._row_to_event(rows[0], "VIIRS_SNPP_NRT") + + assert event.category == "fire.hotspot.viirs_snpp.high" + + @pytest.mark.asyncio + async def test_event_severity(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + rows = adapter._parse_csv(SAMPLE_CSV, "VIIRS_SNPP_NRT") + + high_event = adapter._row_to_event(rows[0], "VIIRS_SNPP_NRT") + nominal_event = adapter._row_to_event(rows[1], "VIIRS_SNPP_NRT") + low_event = adapter._row_to_event(rows[2], "VIIRS_SNPP_NRT") + + assert high_event.severity == 3 + assert nominal_event.severity == 2 + assert low_event.severity == 1 + + @pytest.mark.asyncio + async def test_event_geo(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + + rows = adapter._parse_csv(SAMPLE_CSV, "VIIRS_SNPP_NRT") + event = adapter._row_to_event(rows[0], "VIIRS_SNPP_NRT") + + # GeoJSON order: lon, lat + assert event.geo.centroid == (-116.456, 45.123) + assert event.geo.bbox == (-116.456, 45.123, -116.456, 45.123) + + +class TestDeduplication: + """Test deduplication logic.""" + + @pytest.mark.asyncio + async def test_dedup_marks_published(self, temp_db_path, mock_config_store): + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + stable_id = "VIIRS_SNPP_NRT:2026-05-16:1430:45.123:-116.456" + + # Not published initially + assert not adapter._is_published(stable_id) + + # Mark as published + adapter._mark_published(stable_id) + + # Now should be published + assert adapter._is_published(stable_id) + + await adapter.shutdown() + + @pytest.mark.asyncio + async def test_dedup_prevents_duplicates(self, temp_db_path, mock_config_store): + """Test that duplicate rows don't produce duplicate events.""" + # Use only one satellite to simplify the test + config = make_adapter_config(satellites=["VIIRS_SNPP_NRT"]) + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + # Mock the fetch to return CSV with duplicates + with patch.object(adapter, "_fetch_csv", new_callable=AsyncMock) as mock_fetch: + mock_fetch.return_value = SAMPLE_CSV_WITH_DUPE + + events = [] + async for event in adapter.poll(): + events.append(event) + + # Should only get one event despite two identical rows + assert len(events) == 1 + + await adapter.shutdown() + + +class TestSubjectGeneration: + """Test subject generation for fire hotspots.""" + + def test_subject_format(self): + event = Event( + id="test", + source="central/adapters/firms", + category="fire.hotspot.viirs_snpp.high", + time=datetime.now(timezone.utc), + severity=3, + geo=Geo(centroid=(-116.0, 45.0)), + data={}, + ) + + subject = subject_for_fire_hotspot(event) + assert subject == "central.fire.hotspot.viirs_snpp.high" + + def test_subject_nominal_confidence(self): + event = Event( + id="test", + source="central/adapters/firms", + category="fire.hotspot.viirs_noaa20.nominal", + time=datetime.now(timezone.utc), + severity=2, + geo=Geo(centroid=(-116.0, 45.0)), + data={}, + ) + + subject = subject_for_fire_hotspot(event) + assert subject == "central.fire.hotspot.viirs_noaa20.nominal" + + +class TestUrlBuilding: + """Test FIRMS API URL building.""" + + @pytest.mark.asyncio + async def test_url_format(self, temp_db_path, mock_config_store): + config = make_adapter_config( + region={"north": 49.5, "south": 31.0, "east": -102.0, "west": -124.5} + ) + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + url = adapter._build_url("VIIRS_SNPP_NRT") + + assert url is not None + assert "test_api_key" in url + assert "VIIRS_SNPP_NRT" in url + assert "-124.5,31.0,-102.0,49.5" in url # west,south,east,north + assert "/1" in url # dayRange + + await adapter.shutdown() + + @pytest.mark.asyncio + async def test_url_none_without_key(self, temp_db_path): + mock_store = MagicMock() + mock_store.get_api_key = AsyncMock(return_value=None) + + config = make_adapter_config() + adapter = FIRMSAdapter( + config=config, + config_store=mock_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + url = adapter._build_url("VIIRS_SNPP_NRT") + + assert url is None + + await adapter.shutdown() + + +class TestApplyConfig: + """Test hot-reload configuration application.""" + + @pytest.mark.asyncio + async def test_apply_config_updates_region(self, temp_db_path, mock_config_store): + config = make_adapter_config( + region={"north": 49.5, "south": 31.0, "east": -102.0, "west": -124.5} + ) + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + # Original region + assert adapter.region.north == 49.5 + + # Apply new config with different region + new_config = make_adapter_config( + region={"north": 48.0, "south": 45.0, "east": -115.0, "west": -125.0} + ) + await adapter.apply_config(new_config) + + assert adapter.region.north == 48.0 + assert adapter.region.south == 45.0 + + await adapter.shutdown() + + @pytest.mark.asyncio + async def test_apply_config_updates_satellites(self, temp_db_path, mock_config_store): + config = make_adapter_config(satellites=["VIIRS_SNPP_NRT", "VIIRS_NOAA20_NRT"]) + adapter = FIRMSAdapter( + config=config, + config_store=mock_config_store, + cursor_db_path=temp_db_path, + ) + await adapter.startup() + + # Original satellites + assert len(adapter._satellites) == 2 + + # Apply config with single satellite + new_config = make_adapter_config(satellites=["VIIRS_NOAA20_NRT"]) + await adapter.apply_config(new_config) + + assert adapter._satellites == ["VIIRS_NOAA20_NRT"] + + await adapter.shutdown() From a007418e0adff23296d047553cd39dccbae8f8cf Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:58:37 +0000 Subject: [PATCH 17/22] feat(models): add fire event subject routing Update subject_for_event to handle fire.* category events: - Fire events: central. (e.g., central.fire.hotspot.viirs_snpp.high) - Weather events: existing geo-based subject logic Co-Authored-By: Claude Opus 4.5 --- src/central/models.py | 147 +++++++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 68 deletions(-) diff --git a/src/central/models.py b/src/central/models.py index 588591c..9671202 100644 --- a/src/central/models.py +++ b/src/central/models.py @@ -1,68 +1,79 @@ -"""Data models for Central event processing.""" - -from datetime import datetime -from typing import Any - -from pydantic import BaseModel, ConfigDict - - -class Geo(BaseModel): - """Geographic context for an event.""" - - model_config = ConfigDict(extra="forbid", frozen=True) - - centroid: tuple[float, float] | None = None # (lon, lat) GeoJSON order - bbox: tuple[float, float, float, float] | None = None # (minLon, minLat, maxLon, maxLat) - regions: list[str] = [] # ["US-ID-Ada", "US-ID-Z033", ...] - primary_region: str | None = None # alphabetically first region, used for subject - - -class Event(BaseModel): - """Canonical event representation for all adapters.""" - - model_config = ConfigDict(extra="forbid", frozen=True) - - id: str # unique, stable across republish - source: str # adapter identity, e.g. "central/adapters/nws" - category: str # e.g. "wx.alert.severe_thunderstorm_warning" - time: datetime # event-time UTC, not processing-time - expires: datetime | None = None - severity: int | None = None # 0..4 or None for "Unknown" - geo: Geo - data: dict[str, Any] # adapter-specific payload - - -def subject_for_event(ev: Event, prefix: str = "central.wx") -> str: - """ - Compute the NATS subject for an alert-style event. - - For weather alerts the subject is: - central.wx.alert.us..county. - or - central.wx.alert.us..zone. - based on whether the primary_region encodes a county or a zone. - - If primary_region is None or unparseable, returns: - central.wx.alert.us.unknown - """ - if ev.geo.primary_region is None: - return f"{prefix}.alert.us.unknown" - - region = ev.geo.primary_region - - # Parse US-- format - # County codes are like "Ada", "Canyon" (names) - # Zone codes start with "Z" like "Z033" - parts = region.split("-") - if len(parts) < 3 or parts[0] != "US": - return f"{prefix}.alert.us.unknown" - - state = parts[1].lower() - code = "-".join(parts[2:]) # Handle multi-part names like "Payette-Washington" - - if code.startswith("Z") and len(code) >= 2 and code[1:].isdigit(): - # Zone code like Z033 - return f"{prefix}.alert.us.{state}.zone.{code.lower()}" - else: - # County name - return f"{prefix}.alert.us.{state}.county.{code.lower()}" +"""Data models for Central event processing.""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict + + +class Geo(BaseModel): + """Geographic context for an event.""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + centroid: tuple[float, float] | None = None # (lon, lat) GeoJSON order + bbox: tuple[float, float, float, float] | None = None # (minLon, minLat, maxLon, maxLat) + regions: list[str] = [] # ["US-ID-Ada", "US-ID-Z033", ...] + primary_region: str | None = None # alphabetically first region, used for subject + + +class Event(BaseModel): + """Canonical event representation for all adapters.""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + id: str # unique, stable across republish + source: str # adapter identity, e.g. "central/adapters/nws" + category: str # e.g. "wx.alert.severe_thunderstorm_warning" or "fire.hotspot.viirs_snpp.high" + time: datetime # event-time UTC, not processing-time + expires: datetime | None = None + severity: int | None = None # 0..4 or None for "Unknown" + geo: Geo + data: dict[str, Any] # adapter-specific payload + + +def subject_for_event(ev: Event) -> str: + """ + Compute the NATS subject for an event based on its category. + + Dispatch by category prefix: + - fire.*: returns central. directly + - wx.*: uses weather alert subject logic + + Weather alert subjects: + central.wx.alert.us..county. + or + central.wx.alert.us..zone. + based on whether the primary_region encodes a county or a zone. + + Fire hotspot subjects: + central.fire.hotspot.. + """ + # Fire events: subject is just central. + if ev.category.startswith("fire."): + return f"central.{ev.category}" + + # Weather events: use geo-based subject logic + prefix = "central.wx" + + if ev.geo.primary_region is None: + return f"{prefix}.alert.us.unknown" + + region = ev.geo.primary_region + + # Parse US-- format + # County codes are like "Ada", "Canyon" (names) + # Zone codes start with "Z" like "Z033" + parts = region.split("-") + if len(parts) < 3 or parts[0] != "US": + return f"{prefix}.alert.us.unknown" + + state = parts[1].lower() + code = "-".join(parts[2:]) # Handle multi-part names like "Payette-Washington" + + if code.startswith("Z") and len(code) >= 2 and code[1:].isdigit(): + # Zone code like Z033 + return f"{prefix}.alert.us.{state}.zone.{code.lower()}" + else: + # County name + return f"{prefix}.alert.us.{state}.county.{code.lower()}" From 5dbaf1dd5cce9b6f5a84614de08cda58827087f0 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:58:41 +0000 Subject: [PATCH 18/22] feat(supervisor): wire FIRMS adapter - Add FIRMSAdapter import and factory case - Add CENTRAL_FIRE stream to STREAM_SUBJECTS Co-Authored-By: Claude Opus 4.5 --- src/central/supervisor.py | 1554 +++++++++++++++++++------------------ 1 file changed, 781 insertions(+), 773 deletions(-) diff --git a/src/central/supervisor.py b/src/central/supervisor.py index 2543291..beb8f9f 100644 --- a/src/central/supervisor.py +++ b/src/central/supervisor.py @@ -1,773 +1,781 @@ -"""Central supervisor - adapter scheduler and event publisher.""" - -import asyncio -import json -import logging -import signal -import sys -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -import nats -from nats.js import JetStreamContext - -from central.adapter import SourceAdapter -from central.adapters.nws import NWSAdapter -from central.cloudevents_wire import wrap_event -from central.config_models import AdapterConfig -from central.config_source import ConfigSource, DbConfigSource -from central.config_store import ConfigStore -from central.bootstrap_config import get_settings -from central.models import subject_for_event -from central.stream_manager import StreamManager - -CURSOR_DB_PATH = Path("/var/lib/central/cursors.db") - -# Stream subject mappings -STREAM_SUBJECTS = { - "CENTRAL_WX": ["central.wx.>"], - "CENTRAL_META": ["central.meta.>"], -} - -# Recompute interval for stream max_bytes (1 hour) -STREAM_RECOMPUTE_INTERVAL_S = 3600 - - -class JsonFormatter(logging.Formatter): - """JSON log formatter for structured logging.""" - - def format(self, record: logging.LogRecord) -> str: - log_obj: dict[str, Any] = { - "ts": datetime.now(timezone.utc).isoformat(), - "level": record.levelname, - "logger": record.name, - "msg": record.getMessage(), - } - if record.exc_info: - log_obj["exc"] = self.formatException(record.exc_info) - if hasattr(record, "extra"): - log_obj.update(record.extra) - for key in record.__dict__: - if key not in ( - "name", "msg", "args", "created", "filename", "funcName", - "levelname", "levelno", "lineno", "module", "msecs", - "pathname", "process", "processName", "relativeCreated", - "stack_info", "exc_info", "exc_text", "thread", "threadName", - "taskName", "message", - ): - log_obj[key] = record.__dict__[key] - return json.dumps(log_obj) - - -def setup_logging() -> None: - """Configure JSON logging to stdout.""" - handler = logging.StreamHandler(sys.stdout) - handler.setFormatter(JsonFormatter()) - logging.root.handlers = [handler] - logging.root.setLevel(logging.INFO) - - -logger = logging.getLogger("central.supervisor") - - -@dataclass -class AdapterState: - """Runtime state for a scheduled adapter.""" - - name: str - adapter: SourceAdapter - config: AdapterConfig - task: asyncio.Task[None] | None = None - last_completed_poll: datetime | None = None - cancel_event: asyncio.Event = field(default_factory=asyncio.Event) - - @property - def is_running(self) -> bool: - """Check if adapter loop is currently running.""" - return self.task is not None and not self.task.done() - - -class Supervisor: - """Main supervisor process.""" - - def __init__( - self, - config_source: ConfigSource, - config_store: ConfigStore, - nats_url: str, - cloudevents_config: Any = None, - ) -> None: - self._config_source = config_source - self._config_store = config_store - self._nats_url = nats_url - self._cloudevents_config = cloudevents_config - self._nc: nats.NATS | None = None - self._js: JetStreamContext | None = None - self._stream_manager: StreamManager | None = None - self._adapter_states: dict[str, AdapterState] = {} - self._tasks: list[asyncio.Task[None]] = [] - self._shutdown_event = asyncio.Event() - self._start_time = datetime.now(timezone.utc) - self._config_watch_task: asyncio.Task[None] | None = None - self._lock = asyncio.Lock() - - async def connect(self) -> None: - """Connect to NATS.""" - self._nc = await nats.connect(self._nats_url) - self._js = self._nc.jetstream() - self._stream_manager = StreamManager(self._js) - logger.info("Connected to NATS", extra={"url": self._nats_url}) - - async def disconnect(self) -> None: - """Disconnect from NATS.""" - if self._nc: - await self._nc.drain() - await self._nc.close() - self._nc = None - self._js = None - self._stream_manager = None - logger.info("Disconnected from NATS") - - async def _publish_meta(self, subject: str, data: dict[str, Any]) -> None: - """Publish a meta event (no Nats-Msg-Id).""" - if not self._nc: - return - payload = json.dumps(data).encode() - await self._nc.publish(subject, payload) - - async def _publish_event(self, subject: str, envelope: dict[str, Any], msg_id: str) -> None: - """Publish an event with dedup header.""" - if not self._js: - return - payload = json.dumps(envelope).encode() - await self._js.publish( - subject, - payload, - headers={"Nats-Msg-Id": msg_id}, - ) - - def _create_adapter(self, config: AdapterConfig) -> SourceAdapter: - """Create an adapter instance based on config name.""" - if config.name == "nws": - return NWSAdapter(config=config, cursor_db_path=CURSOR_DB_PATH) - else: - raise ValueError(f"Unknown adapter type: {config.name}") - - async def _run_adapter_loop(self, state: AdapterState) -> None: - """Run an adapter poll loop with rate-limit aware scheduling.""" - while not self._shutdown_event.is_set(): - # Calculate next poll time based on rate-limit guarantee - now = datetime.now(timezone.utc) - - if state.last_completed_poll is not None: - next_poll_at = state.last_completed_poll.timestamp() + state.config.cadence_s - wait_time = max(0, next_poll_at - now.timestamp()) - else: - # First poll - run immediately - wait_time = 0 - - if wait_time > 0: - logger.debug( - "Waiting for next poll", - extra={ - "adapter": state.name, - "wait_s": wait_time, - "next_poll": datetime.fromtimestamp( - now.timestamp() + wait_time, tz=timezone.utc - ).isoformat(), - }, - ) - # Wait for either timeout or cancel signal - try: - await asyncio.wait_for( - state.cancel_event.wait(), - timeout=wait_time, - ) - # Cancel event was set - check if we should exit or reschedule - if self._shutdown_event.is_set(): - break - # Clear the cancel event and re-evaluate schedule - state.cancel_event.clear() - continue - except asyncio.TimeoutError: - pass - - # Check shutdown before polling - if self._shutdown_event.is_set(): - break - - # Check if adapter is still enabled - if not state.config.enabled or state.config.is_paused: - logger.info( - "Adapter disabled/paused, stopping loop", - extra={"adapter": state.name}, - ) - break - - poll_start = datetime.now(timezone.utc) - try: - async for event in state.adapter.poll(): - # Dedup check - if state.adapter.is_published(event.id): - state.adapter.bump_last_seen(event.id) - continue - - # Build CloudEvent (uses defaults if no config provided) - envelope, msg_id = wrap_event(event, self._cloudevents_config) - - subject = subject_for_event(event) - - # Publish - await self._publish_event(subject, envelope, msg_id) - state.adapter.mark_published(event.id) - - logger.info( - "Published event", - extra={"id": event.id, "subject": subject, "category": event.category} - ) - - # Publish success status - await self._publish_meta( - f"central.meta.adapter.{state.name}.status", - {"ok": True, "ts": datetime.now(timezone.utc).isoformat()} - ) - - # Mark poll completion time for rate limiting - state.last_completed_poll = datetime.now(timezone.utc) - - except Exception as e: - logger.exception("Adapter poll failed", extra={"adapter": state.name}) - await self._publish_meta( - f"central.meta.adapter.{state.name}.status", - { - "ok": False, - "error": str(e), - "ts": datetime.now(timezone.utc).isoformat() - } - ) - # Still mark completion time to avoid tight retry loops - state.last_completed_poll = datetime.now(timezone.utc) - - # Sweep old IDs - swept = state.adapter.sweep_old_ids() - if swept > 0: - logger.info("Swept old published IDs", extra={"count": swept}) - - async def _start_adapter(self, config: AdapterConfig) -> None: - """Start an adapter based on its configuration. - - If the adapter was previously stopped (state exists but task is not running), - reuses the existing state to preserve last_completed_poll for rate limiting. - """ - existing_state = self._adapter_states.get(config.name) - - if existing_state is not None: - if existing_state.is_running: - logger.warning( - "Adapter already running", - extra={"adapter": config.name}, - ) - return - - # Adapter was stopped - restart with preserved state - # Update config and restart the adapter - existing_state.config = config - existing_state.cancel_event.clear() - - # Reinitialize the adapter with new config - existing_state.adapter = self._create_adapter(config) - await existing_state.adapter.startup() - - # Start the loop task - existing_state.task = asyncio.create_task( - self._run_adapter_loop(existing_state) - ) - - # Calculate next poll time for logging - if existing_state.last_completed_poll: - next_poll_at = datetime.fromtimestamp( - existing_state.last_completed_poll.timestamp() + config.cadence_s, - tz=timezone.utc, - ) - if next_poll_at <= datetime.now(timezone.utc): - next_poll_at = datetime.now(timezone.utc) - else: - next_poll_at = datetime.now(timezone.utc) - - logger.info( - "Adapter restarted", - extra={ - "adapter": config.name, - "cadence_s": config.cadence_s, - "preserved_last_poll": existing_state.last_completed_poll.isoformat() - if existing_state.last_completed_poll - else None, - "next_poll": next_poll_at.isoformat(), - }, - ) - return - - # New adapter - create fresh state - try: - adapter = self._create_adapter(config) - except ValueError as e: - logger.warning(str(e), extra={"adapter": config.name}) - return - - await adapter.startup() - - state = AdapterState( - name=config.name, - adapter=adapter, - config=config, - ) - state.task = asyncio.create_task(self._run_adapter_loop(state)) - self._adapter_states[config.name] = state - - logger.info( - "Adapter started", - extra={ - "adapter": config.name, - "cadence_s": config.cadence_s, - }, - ) - - async def _stop_adapter(self, name: str) -> None: - """Stop a running adapter but preserve state for potential restart. - - The adapter state (including last_completed_poll) is preserved so that - if the adapter is re-enabled, the rate-limit guarantee is maintained. - Use _remove_adapter() to fully remove an adapter from tracking. - """ - state = self._adapter_states.get(name) - if state is None: - return - - if not state.is_running: - # Already stopped - return - - # Signal the loop to stop - state.cancel_event.set() - - if state.task: - state.task.cancel() - try: - await state.task - except asyncio.CancelledError: - pass - state.task = None - - await state.adapter.shutdown() - logger.info( - "Adapter stopped", - extra={ - "adapter": name, - "preserved_last_poll": state.last_completed_poll.isoformat() - if state.last_completed_poll - else None, - }, - ) - - async def _remove_adapter(self, name: str) -> None: - """Fully remove an adapter, dropping all preserved state. - - Called when an adapter is deleted from the database (not just disabled). - """ - state = self._adapter_states.pop(name, None) - if state is None: - return - - # Stop if running - if state.is_running: - state.cancel_event.set() - if state.task: - state.task.cancel() - try: - await state.task - except asyncio.CancelledError: - pass - - await state.adapter.shutdown() - - logger.info( - "Adapter removed", - extra={"adapter": name}, - ) - - async def _reschedule_adapter( - self, - name: str, - new_config: AdapterConfig, - ) -> AdapterState | None: - """Reschedule an adapter with new configuration. - - Maintains rate-limit guarantee: next poll at - (last_completed_poll + new_cadence_s), not now + new_cadence_s. - - Returns the AdapterState to signal, or None if no signal needed. - The caller must signal cancel_event AFTER releasing any locks to - ensure immediate event delivery to the sleeping loop. - """ - state = self._adapter_states.get(name) - if state is None: - # Adapter not running - just start it - await self._start_adapter(new_config) - return None - - if not state.is_running: - # Adapter stopped - restart it - await self._start_adapter(new_config) - return None - - old_cadence = state.config.cadence_s - new_cadence = new_config.cadence_s - - # Update config - state.config = new_config - - # Apply config to adapter (generic - each adapter handles its own settings) - await state.adapter.apply_config(new_config) - - # Calculate next poll time for logging - if state.last_completed_poll: - next_poll_at = datetime.fromtimestamp( - state.last_completed_poll.timestamp() + new_cadence, - tz=timezone.utc, - ) - else: - next_poll_at = datetime.now(timezone.utc) - - logger.info( - "Rescheduled adapter", - extra={ - "adapter": name, - "old_cadence_s": old_cadence, - "new_cadence_s": new_cadence, - "next_poll": next_poll_at.isoformat(), - }, - ) - - # Return state so caller can signal OUTSIDE any locks. - # This ensures immediate event delivery to the sleeping loop. - return state - - async def _ensure_streams(self) -> None: - """Ensure all configured streams exist with correct settings.""" - if not self._stream_manager: - return - - streams = await self._config_store.list_streams() - for stream_config in streams: - subjects = STREAM_SUBJECTS.get(stream_config.name, []) - if not subjects: - logger.warning( - "No subjects configured for stream", - extra={"stream": stream_config.name}, - ) - continue - - try: - await self._stream_manager.ensure_stream( - stream_config.name, - subjects, - stream_config, - ) - except Exception as e: - logger.error( - "Failed to ensure stream", - extra={"stream": stream_config.name, "error": str(e)}, - ) - - async def _handle_stream_change(self, stream_name: str) -> None: - """Handle a stream configuration change.""" - if not self._stream_manager: - return - - stream_config = await self._config_store.get_stream(stream_name) - if stream_config is None: - logger.warning( - "Stream config not found", - extra={"stream": stream_name}, - ) - return - - try: - # Apply retention settings - await self._stream_manager.apply_retention(stream_name, stream_config) - - # Immediate recompute of max_bytes - new_max_bytes = await self._stream_manager.recompute_max_bytes( - stream_name, stream_config.max_age_s - ) - - # Update database (won't trigger NOTIFY due to column filter) - await self._config_store.update_stream_max_bytes(stream_name, new_max_bytes) - - logger.info( - "Stream retention updated", - extra={ - "stream": stream_name, - "max_age_s": stream_config.max_age_s, - "new_max_bytes": new_max_bytes, - }, - ) - except Exception as e: - logger.error( - "Failed to handle stream change", - extra={"stream": stream_name, "error": str(e)}, - ) - - async def _stream_retention_recompute_loop(self) -> None: - """Periodically recompute max_bytes for all streams.""" - while not self._shutdown_event.is_set(): - try: - await asyncio.wait_for( - self._shutdown_event.wait(), - timeout=STREAM_RECOMPUTE_INTERVAL_S, - ) - # Shutdown requested - break - except asyncio.TimeoutError: - pass - - # Recompute for all streams - if not self._stream_manager: - continue - - streams = await self._config_store.list_streams() - for stream_config in streams: - if not stream_config.managed_max_bytes: - continue - - try: - new_max_bytes = await self._stream_manager.recompute_max_bytes( - stream_config.name, stream_config.max_age_s - ) - - # Only update if change > 10% - change_ratio = abs(new_max_bytes - stream_config.max_bytes) / max(stream_config.max_bytes, 1) - if change_ratio > 0.10: - await self._config_store.update_stream_max_bytes( - stream_config.name, new_max_bytes - ) - await self._stream_manager.apply_retention( - stream_config.name, - await self._config_store.get_stream(stream_config.name), - ) - logger.info( - "Recomputed stream max_bytes", - extra={ - "stream": stream_config.name, - "old_max_bytes": stream_config.max_bytes, - "new_max_bytes": new_max_bytes, - "change_ratio": change_ratio, - }, - ) - except Exception as e: - logger.error( - "Failed to recompute stream max_bytes", - extra={"stream": stream_config.name, "error": str(e)}, - ) - - async def _on_config_change(self, table: str, key: str) -> None: - """Handle a configuration change notification. - - Called when NOTIFY fires for config changes. - """ - # Handle stream changes - if table == "streams": - stream_name = key - logger.info( - "Stream config change received", - extra={"stream": stream_name}, - ) - await self._handle_stream_change(stream_name) - return - - if table != "adapters": - return - - adapter_name = key - logger.info( - "Config change received", - extra={"table": table, "key": key}, - ) - - # Track state that needs signaling after lock release - state_to_signal: AdapterState | None = None - - async with self._lock: - # Fetch the current config for this adapter - new_config = await self._config_source.get_adapter(adapter_name) - current_state = self._adapter_states.get(adapter_name) - - if new_config is None: - # Adapter was deleted - fully remove, don't just stop - if current_state: - await self._remove_adapter(adapter_name) - logger.info( - "Adapter deleted, removed", - extra={"adapter": adapter_name}, - ) - return - - if not new_config.enabled or new_config.is_paused: - # Adapter disabled or paused - stop but preserve state - if current_state and current_state.is_running: - await self._stop_adapter(adapter_name) - logger.info( - "Adapter disabled/paused, stopped", - extra={ - "adapter": adapter_name, - "enabled": new_config.enabled, - "paused": new_config.is_paused, - }, - ) - return - - if current_state is None or not current_state.is_running: - # Adapter was enabled or created - start (will reuse state if exists) - await self._start_adapter(new_config) - logger.info( - "Adapter enabled, started", - extra={"adapter": adapter_name}, - ) - else: - # Adapter config changed (cadence, settings) - state_to_signal = await self._reschedule_adapter(adapter_name, new_config) - - # Signal OUTSIDE the lock to ensure immediate event delivery. - # This fixes cadence-decrease hot-reload where the signal was - # delayed by asyncio task scheduling while holding the lock. - if state_to_signal is not None: - state_to_signal.cancel_event.set() - - async def _heartbeat_loop(self) -> None: - """Publish periodic heartbeats.""" - while not self._shutdown_event.is_set(): - uptime = (datetime.now(timezone.utc) - self._start_time).total_seconds() - await self._publish_meta( - "central.meta.heartbeat", - {"ts": datetime.now(timezone.utc).isoformat(), "uptime_s": uptime} - ) - try: - await asyncio.wait_for( - self._shutdown_event.wait(), - timeout=30 - ) - except asyncio.TimeoutError: - pass - - async def start(self) -> None: - """Start the supervisor.""" - await self.connect() - - # Ensure streams exist with correct configuration - await self._ensure_streams() - - # Load and start enabled adapters - enabled_adapters = await self._config_source.list_enabled_adapters() - for config in enabled_adapters: - await self._start_adapter(config) - - # Start config watcher (runs forever, calling callback on changes) - self._config_watch_task = asyncio.create_task( - self._config_source.watch_for_changes(self._on_config_change) - ) - - # Start heartbeat - self._tasks.append(asyncio.create_task(self._heartbeat_loop())) - - # Start stream retention recompute loop - self._tasks.append(asyncio.create_task(self._stream_retention_recompute_loop())) - - logger.info( - "Supervisor started", - extra={"adapters": list(self._adapter_states.keys())}, - ) - - async def stop(self) -> None: - """Stop the supervisor gracefully.""" - logger.info("Supervisor shutting down") - self._shutdown_event.set() - - # Cancel config watcher - if self._config_watch_task: - self._config_watch_task.cancel() - try: - await self._config_watch_task - except asyncio.CancelledError: - pass - - # Cancel heartbeat and other tasks - for task in self._tasks: - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - - # Remove all adapters (full cleanup) - for name in list(self._adapter_states.keys()): - await self._remove_adapter(name) - - # Close config source - await self._config_source.close() - - await self.disconnect() - logger.info("Supervisor stopped") - - -async def async_main() -> None: - """Async entry point.""" - setup_logging() - - settings = get_settings() - logger.info( - "Config source: db", - extra={"config_source": "db"}, - ) - - # Create database config source and store - config_source = await DbConfigSource.create(settings.db_dsn) - config_store = await ConfigStore.create(settings.db_dsn) - - supervisor = Supervisor( - config_source=config_source, - config_store=config_store, - nats_url=settings.nats_url, - # CloudEvents uses protocol-level defaults from cloudevents_constants - cloudevents_config=None, - ) - logger.info( - "CloudEvents config: defaults", - extra={"cloudevents_source": "defaults"}, - ) - - loop = asyncio.get_running_loop() - shutdown_event = asyncio.Event() - - def handle_signal() -> None: - shutdown_event.set() - - for sig in (signal.SIGTERM, signal.SIGINT): - loop.add_signal_handler(sig, handle_signal) - - await supervisor.start() - - # Wait for shutdown signal - await shutdown_event.wait() - - await supervisor.stop() - - -def main() -> None: - """Entry point.""" - asyncio.run(async_main()) - - -if __name__ == "__main__": - main() +"""Central supervisor - adapter scheduler and event publisher.""" + +import asyncio +import json +import logging +import signal +import sys +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import nats +from nats.js import JetStreamContext + +from central.adapter import SourceAdapter +from central.adapters.nws import NWSAdapter +from central.adapters.firms import FIRMSAdapter +from central.cloudevents_wire import wrap_event +from central.config_models import AdapterConfig +from central.config_source import ConfigSource, DbConfigSource +from central.config_store import ConfigStore +from central.bootstrap_config import get_settings +from central.models import subject_for_event +from central.stream_manager import StreamManager + +CURSOR_DB_PATH = Path("/var/lib/central/cursors.db") + +# Stream subject mappings +STREAM_SUBJECTS = { + "CENTRAL_WX": ["central.wx.>"], + "CENTRAL_META": ["central.meta.>"], + "CENTRAL_FIRE": ["central.fire.>"], +} + +# Recompute interval for stream max_bytes (1 hour) +STREAM_RECOMPUTE_INTERVAL_S = 3600 + + +class JsonFormatter(logging.Formatter): + """JSON log formatter for structured logging.""" + + def format(self, record: logging.LogRecord) -> str: + log_obj: dict[str, Any] = { + "ts": datetime.now(timezone.utc).isoformat(), + "level": record.levelname, + "logger": record.name, + "msg": record.getMessage(), + } + if record.exc_info: + log_obj["exc"] = self.formatException(record.exc_info) + if hasattr(record, "extra"): + log_obj.update(record.extra) + for key in record.__dict__: + if key not in ( + "name", "msg", "args", "created", "filename", "funcName", + "levelname", "levelno", "lineno", "module", "msecs", + "pathname", "process", "processName", "relativeCreated", + "stack_info", "exc_info", "exc_text", "thread", "threadName", + "taskName", "message", + ): + log_obj[key] = record.__dict__[key] + return json.dumps(log_obj) + + +def setup_logging() -> None: + """Configure JSON logging to stdout.""" + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(JsonFormatter()) + logging.root.handlers = [handler] + logging.root.setLevel(logging.INFO) + + +logger = logging.getLogger("central.supervisor") + + +@dataclass +class AdapterState: + """Runtime state for a scheduled adapter.""" + + name: str + adapter: SourceAdapter + config: AdapterConfig + task: asyncio.Task[None] | None = None + last_completed_poll: datetime | None = None + cancel_event: asyncio.Event = field(default_factory=asyncio.Event) + + @property + def is_running(self) -> bool: + """Check if adapter loop is currently running.""" + return self.task is not None and not self.task.done() + + +class Supervisor: + """Main supervisor process.""" + + def __init__( + self, + config_source: ConfigSource, + config_store: ConfigStore, + nats_url: str, + cloudevents_config: Any = None, + ) -> None: + self._config_source = config_source + self._config_store = config_store + self._nats_url = nats_url + self._cloudevents_config = cloudevents_config + self._nc: nats.NATS | None = None + self._js: JetStreamContext | None = None + self._stream_manager: StreamManager | None = None + self._adapter_states: dict[str, AdapterState] = {} + self._tasks: list[asyncio.Task[None]] = [] + self._shutdown_event = asyncio.Event() + self._start_time = datetime.now(timezone.utc) + self._config_watch_task: asyncio.Task[None] | None = None + self._lock = asyncio.Lock() + + async def connect(self) -> None: + """Connect to NATS.""" + self._nc = await nats.connect(self._nats_url) + self._js = self._nc.jetstream() + self._stream_manager = StreamManager(self._js) + logger.info("Connected to NATS", extra={"url": self._nats_url}) + + async def disconnect(self) -> None: + """Disconnect from NATS.""" + if self._nc: + await self._nc.drain() + await self._nc.close() + self._nc = None + self._js = None + self._stream_manager = None + logger.info("Disconnected from NATS") + + async def _publish_meta(self, subject: str, data: dict[str, Any]) -> None: + """Publish a meta event (no Nats-Msg-Id).""" + if not self._nc: + return + payload = json.dumps(data).encode() + await self._nc.publish(subject, payload) + + async def _publish_event(self, subject: str, envelope: dict[str, Any], msg_id: str) -> None: + """Publish an event with dedup header.""" + if not self._js: + return + payload = json.dumps(envelope).encode() + await self._js.publish( + subject, + payload, + headers={"Nats-Msg-Id": msg_id}, + ) + + def _create_adapter(self, config: AdapterConfig) -> SourceAdapter: + """Create an adapter instance based on config name.""" + if config.name == "nws": + return NWSAdapter(config=config, cursor_db_path=CURSOR_DB_PATH) + elif config.name == "firms": + return FIRMSAdapter( + config=config, + config_store=self._config_store, + cursor_db_path=CURSOR_DB_PATH, + ) + else: + raise ValueError(f"Unknown adapter type: {config.name}") + + async def _run_adapter_loop(self, state: AdapterState) -> None: + """Run an adapter poll loop with rate-limit aware scheduling.""" + while not self._shutdown_event.is_set(): + # Calculate next poll time based on rate-limit guarantee + now = datetime.now(timezone.utc) + + if state.last_completed_poll is not None: + next_poll_at = state.last_completed_poll.timestamp() + state.config.cadence_s + wait_time = max(0, next_poll_at - now.timestamp()) + else: + # First poll - run immediately + wait_time = 0 + + if wait_time > 0: + logger.debug( + "Waiting for next poll", + extra={ + "adapter": state.name, + "wait_s": wait_time, + "next_poll": datetime.fromtimestamp( + now.timestamp() + wait_time, tz=timezone.utc + ).isoformat(), + }, + ) + # Wait for either timeout or cancel signal + try: + await asyncio.wait_for( + state.cancel_event.wait(), + timeout=wait_time, + ) + # Cancel event was set - check if we should exit or reschedule + if self._shutdown_event.is_set(): + break + # Clear the cancel event and re-evaluate schedule + state.cancel_event.clear() + continue + except asyncio.TimeoutError: + pass + + # Check shutdown before polling + if self._shutdown_event.is_set(): + break + + # Check if adapter is still enabled + if not state.config.enabled or state.config.is_paused: + logger.info( + "Adapter disabled/paused, stopping loop", + extra={"adapter": state.name}, + ) + break + + poll_start = datetime.now(timezone.utc) + try: + async for event in state.adapter.poll(): + # Dedup check + if state.adapter.is_published(event.id): + state.adapter.bump_last_seen(event.id) + continue + + # Build CloudEvent (uses defaults if no config provided) + envelope, msg_id = wrap_event(event, self._cloudevents_config) + + subject = subject_for_event(event) + + # Publish + await self._publish_event(subject, envelope, msg_id) + state.adapter.mark_published(event.id) + + logger.info( + "Published event", + extra={"id": event.id, "subject": subject, "category": event.category} + ) + + # Publish success status + await self._publish_meta( + f"central.meta.adapter.{state.name}.status", + {"ok": True, "ts": datetime.now(timezone.utc).isoformat()} + ) + + # Mark poll completion time for rate limiting + state.last_completed_poll = datetime.now(timezone.utc) + + except Exception as e: + logger.exception("Adapter poll failed", extra={"adapter": state.name}) + await self._publish_meta( + f"central.meta.adapter.{state.name}.status", + { + "ok": False, + "error": str(e), + "ts": datetime.now(timezone.utc).isoformat() + } + ) + # Still mark completion time to avoid tight retry loops + state.last_completed_poll = datetime.now(timezone.utc) + + # Sweep old IDs + swept = state.adapter.sweep_old_ids() + if swept > 0: + logger.info("Swept old published IDs", extra={"count": swept}) + + async def _start_adapter(self, config: AdapterConfig) -> None: + """Start an adapter based on its configuration. + + If the adapter was previously stopped (state exists but task is not running), + reuses the existing state to preserve last_completed_poll for rate limiting. + """ + existing_state = self._adapter_states.get(config.name) + + if existing_state is not None: + if existing_state.is_running: + logger.warning( + "Adapter already running", + extra={"adapter": config.name}, + ) + return + + # Adapter was stopped - restart with preserved state + # Update config and restart the adapter + existing_state.config = config + existing_state.cancel_event.clear() + + # Reinitialize the adapter with new config + existing_state.adapter = self._create_adapter(config) + await existing_state.adapter.startup() + + # Start the loop task + existing_state.task = asyncio.create_task( + self._run_adapter_loop(existing_state) + ) + + # Calculate next poll time for logging + if existing_state.last_completed_poll: + next_poll_at = datetime.fromtimestamp( + existing_state.last_completed_poll.timestamp() + config.cadence_s, + tz=timezone.utc, + ) + if next_poll_at <= datetime.now(timezone.utc): + next_poll_at = datetime.now(timezone.utc) + else: + next_poll_at = datetime.now(timezone.utc) + + logger.info( + "Adapter restarted", + extra={ + "adapter": config.name, + "cadence_s": config.cadence_s, + "preserved_last_poll": existing_state.last_completed_poll.isoformat() + if existing_state.last_completed_poll + else None, + "next_poll": next_poll_at.isoformat(), + }, + ) + return + + # New adapter - create fresh state + try: + adapter = self._create_adapter(config) + except ValueError as e: + logger.warning(str(e), extra={"adapter": config.name}) + return + + await adapter.startup() + + state = AdapterState( + name=config.name, + adapter=adapter, + config=config, + ) + state.task = asyncio.create_task(self._run_adapter_loop(state)) + self._adapter_states[config.name] = state + + logger.info( + "Adapter started", + extra={ + "adapter": config.name, + "cadence_s": config.cadence_s, + }, + ) + + async def _stop_adapter(self, name: str) -> None: + """Stop a running adapter but preserve state for potential restart. + + The adapter state (including last_completed_poll) is preserved so that + if the adapter is re-enabled, the rate-limit guarantee is maintained. + Use _remove_adapter() to fully remove an adapter from tracking. + """ + state = self._adapter_states.get(name) + if state is None: + return + + if not state.is_running: + # Already stopped + return + + # Signal the loop to stop + state.cancel_event.set() + + if state.task: + state.task.cancel() + try: + await state.task + except asyncio.CancelledError: + pass + state.task = None + + await state.adapter.shutdown() + logger.info( + "Adapter stopped", + extra={ + "adapter": name, + "preserved_last_poll": state.last_completed_poll.isoformat() + if state.last_completed_poll + else None, + }, + ) + + async def _remove_adapter(self, name: str) -> None: + """Fully remove an adapter, dropping all preserved state. + + Called when an adapter is deleted from the database (not just disabled). + """ + state = self._adapter_states.pop(name, None) + if state is None: + return + + # Stop if running + if state.is_running: + state.cancel_event.set() + if state.task: + state.task.cancel() + try: + await state.task + except asyncio.CancelledError: + pass + + await state.adapter.shutdown() + + logger.info( + "Adapter removed", + extra={"adapter": name}, + ) + + async def _reschedule_adapter( + self, + name: str, + new_config: AdapterConfig, + ) -> AdapterState | None: + """Reschedule an adapter with new configuration. + + Maintains rate-limit guarantee: next poll at + (last_completed_poll + new_cadence_s), not now + new_cadence_s. + + Returns the AdapterState to signal, or None if no signal needed. + The caller must signal cancel_event AFTER releasing any locks to + ensure immediate event delivery to the sleeping loop. + """ + state = self._adapter_states.get(name) + if state is None: + # Adapter not running - just start it + await self._start_adapter(new_config) + return None + + if not state.is_running: + # Adapter stopped - restart it + await self._start_adapter(new_config) + return None + + old_cadence = state.config.cadence_s + new_cadence = new_config.cadence_s + + # Update config + state.config = new_config + + # Apply config to adapter (generic - each adapter handles its own settings) + await state.adapter.apply_config(new_config) + + # Calculate next poll time for logging + if state.last_completed_poll: + next_poll_at = datetime.fromtimestamp( + state.last_completed_poll.timestamp() + new_cadence, + tz=timezone.utc, + ) + else: + next_poll_at = datetime.now(timezone.utc) + + logger.info( + "Rescheduled adapter", + extra={ + "adapter": name, + "old_cadence_s": old_cadence, + "new_cadence_s": new_cadence, + "next_poll": next_poll_at.isoformat(), + }, + ) + + # Return state so caller can signal OUTSIDE any locks. + # This ensures immediate event delivery to the sleeping loop. + return state + + async def _ensure_streams(self) -> None: + """Ensure all configured streams exist with correct settings.""" + if not self._stream_manager: + return + + streams = await self._config_store.list_streams() + for stream_config in streams: + subjects = STREAM_SUBJECTS.get(stream_config.name, []) + if not subjects: + logger.warning( + "No subjects configured for stream", + extra={"stream": stream_config.name}, + ) + continue + + try: + await self._stream_manager.ensure_stream( + stream_config.name, + subjects, + stream_config, + ) + except Exception as e: + logger.error( + "Failed to ensure stream", + extra={"stream": stream_config.name, "error": str(e)}, + ) + + async def _handle_stream_change(self, stream_name: str) -> None: + """Handle a stream configuration change.""" + if not self._stream_manager: + return + + stream_config = await self._config_store.get_stream(stream_name) + if stream_config is None: + logger.warning( + "Stream config not found", + extra={"stream": stream_name}, + ) + return + + try: + # Apply retention settings + await self._stream_manager.apply_retention(stream_name, stream_config) + + # Immediate recompute of max_bytes + new_max_bytes = await self._stream_manager.recompute_max_bytes( + stream_name, stream_config.max_age_s + ) + + # Update database (won't trigger NOTIFY due to column filter) + await self._config_store.update_stream_max_bytes(stream_name, new_max_bytes) + + logger.info( + "Stream retention updated", + extra={ + "stream": stream_name, + "max_age_s": stream_config.max_age_s, + "new_max_bytes": new_max_bytes, + }, + ) + except Exception as e: + logger.error( + "Failed to handle stream change", + extra={"stream": stream_name, "error": str(e)}, + ) + + async def _stream_retention_recompute_loop(self) -> None: + """Periodically recompute max_bytes for all streams.""" + while not self._shutdown_event.is_set(): + try: + await asyncio.wait_for( + self._shutdown_event.wait(), + timeout=STREAM_RECOMPUTE_INTERVAL_S, + ) + # Shutdown requested + break + except asyncio.TimeoutError: + pass + + # Recompute for all streams + if not self._stream_manager: + continue + + streams = await self._config_store.list_streams() + for stream_config in streams: + if not stream_config.managed_max_bytes: + continue + + try: + new_max_bytes = await self._stream_manager.recompute_max_bytes( + stream_config.name, stream_config.max_age_s + ) + + # Only update if change > 10% + change_ratio = abs(new_max_bytes - stream_config.max_bytes) / max(stream_config.max_bytes, 1) + if change_ratio > 0.10: + await self._config_store.update_stream_max_bytes( + stream_config.name, new_max_bytes + ) + await self._stream_manager.apply_retention( + stream_config.name, + await self._config_store.get_stream(stream_config.name), + ) + logger.info( + "Recomputed stream max_bytes", + extra={ + "stream": stream_config.name, + "old_max_bytes": stream_config.max_bytes, + "new_max_bytes": new_max_bytes, + "change_ratio": change_ratio, + }, + ) + except Exception as e: + logger.error( + "Failed to recompute stream max_bytes", + extra={"stream": stream_config.name, "error": str(e)}, + ) + + async def _on_config_change(self, table: str, key: str) -> None: + """Handle a configuration change notification. + + Called when NOTIFY fires for config changes. + """ + # Handle stream changes + if table == "streams": + stream_name = key + logger.info( + "Stream config change received", + extra={"stream": stream_name}, + ) + await self._handle_stream_change(stream_name) + return + + if table != "adapters": + return + + adapter_name = key + logger.info( + "Config change received", + extra={"table": table, "key": key}, + ) + + # Track state that needs signaling after lock release + state_to_signal: AdapterState | None = None + + async with self._lock: + # Fetch the current config for this adapter + new_config = await self._config_source.get_adapter(adapter_name) + current_state = self._adapter_states.get(adapter_name) + + if new_config is None: + # Adapter was deleted - fully remove, don't just stop + if current_state: + await self._remove_adapter(adapter_name) + logger.info( + "Adapter deleted, removed", + extra={"adapter": adapter_name}, + ) + return + + if not new_config.enabled or new_config.is_paused: + # Adapter disabled or paused - stop but preserve state + if current_state and current_state.is_running: + await self._stop_adapter(adapter_name) + logger.info( + "Adapter disabled/paused, stopped", + extra={ + "adapter": adapter_name, + "enabled": new_config.enabled, + "paused": new_config.is_paused, + }, + ) + return + + if current_state is None or not current_state.is_running: + # Adapter was enabled or created - start (will reuse state if exists) + await self._start_adapter(new_config) + logger.info( + "Adapter enabled, started", + extra={"adapter": adapter_name}, + ) + else: + # Adapter config changed (cadence, settings) + state_to_signal = await self._reschedule_adapter(adapter_name, new_config) + + # Signal OUTSIDE the lock to ensure immediate event delivery. + # This fixes cadence-decrease hot-reload where the signal was + # delayed by asyncio task scheduling while holding the lock. + if state_to_signal is not None: + state_to_signal.cancel_event.set() + + async def _heartbeat_loop(self) -> None: + """Publish periodic heartbeats.""" + while not self._shutdown_event.is_set(): + uptime = (datetime.now(timezone.utc) - self._start_time).total_seconds() + await self._publish_meta( + "central.meta.heartbeat", + {"ts": datetime.now(timezone.utc).isoformat(), "uptime_s": uptime} + ) + try: + await asyncio.wait_for( + self._shutdown_event.wait(), + timeout=30 + ) + except asyncio.TimeoutError: + pass + + async def start(self) -> None: + """Start the supervisor.""" + await self.connect() + + # Ensure streams exist with correct configuration + await self._ensure_streams() + + # Load and start enabled adapters + enabled_adapters = await self._config_source.list_enabled_adapters() + for config in enabled_adapters: + await self._start_adapter(config) + + # Start config watcher (runs forever, calling callback on changes) + self._config_watch_task = asyncio.create_task( + self._config_source.watch_for_changes(self._on_config_change) + ) + + # Start heartbeat + self._tasks.append(asyncio.create_task(self._heartbeat_loop())) + + # Start stream retention recompute loop + self._tasks.append(asyncio.create_task(self._stream_retention_recompute_loop())) + + logger.info( + "Supervisor started", + extra={"adapters": list(self._adapter_states.keys())}, + ) + + async def stop(self) -> None: + """Stop the supervisor gracefully.""" + logger.info("Supervisor shutting down") + self._shutdown_event.set() + + # Cancel config watcher + if self._config_watch_task: + self._config_watch_task.cancel() + try: + await self._config_watch_task + except asyncio.CancelledError: + pass + + # Cancel heartbeat and other tasks + for task in self._tasks: + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + # Remove all adapters (full cleanup) + for name in list(self._adapter_states.keys()): + await self._remove_adapter(name) + + # Close config source + await self._config_source.close() + + await self.disconnect() + logger.info("Supervisor stopped") + + +async def async_main() -> None: + """Async entry point.""" + setup_logging() + + settings = get_settings() + logger.info( + "Config source: db", + extra={"config_source": "db"}, + ) + + # Create database config source and store + config_source = await DbConfigSource.create(settings.db_dsn) + config_store = await ConfigStore.create(settings.db_dsn) + + supervisor = Supervisor( + config_source=config_source, + config_store=config_store, + nats_url=settings.nats_url, + # CloudEvents uses protocol-level defaults from cloudevents_constants + cloudevents_config=None, + ) + logger.info( + "CloudEvents config: defaults", + extra={"cloudevents_source": "defaults"}, + ) + + loop = asyncio.get_running_loop() + shutdown_event = asyncio.Event() + + def handle_signal() -> None: + shutdown_event.set() + + for sig in (signal.SIGTERM, signal.SIGINT): + loop.add_signal_handler(sig, handle_signal) + + await supervisor.start() + + # Wait for shutdown signal + await shutdown_event.wait() + + await supervisor.stop() + + +def main() -> None: + """Entry point.""" + asyncio.run(async_main()) + + +if __name__ == "__main__": + main() From 47359a8144d768d58b6b6eed42a12f2bc0bf2dbf Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 19:58:46 +0000 Subject: [PATCH 19/22] docs: add FIRMS GUI planning notes - MAP_KEY management (alias display, rotation) - Satellite selection (toggle SNPP/NOAA20/NOAA21) - SNPP end-of-life notice (~Oct 2026) Co-Authored-By: Claude Opus 4.5 --- docs/PHASE-1B-NOTES.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/PHASE-1B-NOTES.md b/docs/PHASE-1B-NOTES.md index b7cecc8..878c99f 100644 --- a/docs/PHASE-1B-NOTES.md +++ b/docs/PHASE-1B-NOTES.md @@ -53,3 +53,41 @@ Per stream, display: - Supervisor receives NOTIFY and hot-reloads - No service restarts required for config changes - Stream retention changes apply within 5 seconds + +## FIRMS Adapter Configuration + +### MAP_KEY Management +- Display key alias () and timestamp +- Allow operator to rotate key value (re-encrypt new key) +- Show warning if key not present (polling disabled) +- No key value display (security) + +### Satellite Selection +- Toggle individual satellites: VIIRS_SNPP, VIIRS_NOAA20, VIIRS_NOAA21 +- Stored in array +- Changes hot-reload to adapter without restart + +### SNPP End-of-Life Notice +- NASA timeline: SNPP mission ends ~October 2026 +- GUI should display warning banner when SNPP is enabled and date approaches +- Recommend adding NOAA-21 to satellites list before SNPP EOL +- After EOL, adapter will fail to fetch SNPP data (404); GUI should surface this + +## FIRMS Adapter Configuration + +### MAP_KEY Management +- Display key alias (firms) and last_used_at timestamp +- Allow operator to rotate key value (re-encrypt new key) +- Show warning if key not present (polling disabled) +- No key value display (security) + +### Satellite Selection +- Toggle individual satellites: VIIRS_SNPP, VIIRS_NOAA20, VIIRS_NOAA21 +- Stored in config.adapters.settings.satellites array +- Changes hot-reload to adapter without restart + +### SNPP End-of-Life Notice +- NASA timeline: SNPP mission ends ~October 2026 +- GUI should display warning banner when SNPP is enabled and date approaches +- Recommend adding NOAA-21 to satellites list before SNPP EOL +- After EOL, adapter will fail to fetch SNPP data (404); GUI should surface this From 22c50d317689db39f9c0986b8de3b7fbfdfa5d80 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 20:03:34 +0000 Subject: [PATCH 20/22] fix(firms): use public is_published/mark_published methods Match NWS adapter pattern for supervisor compatibility. Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/firms.py | 8 ++++---- tests/test_firms.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/central/adapters/firms.py b/src/central/adapters/firms.py index de3603c..48b5415 100644 --- a/src/central/adapters/firms.py +++ b/src/central/adapters/firms.py @@ -175,7 +175,7 @@ class FIRMSAdapter(SourceAdapter): self._db = None logger.info("FIRMS adapter shut down") - def _is_published(self, stable_id: str) -> bool: + def is_published(self, stable_id: str) -> bool: """Check if an event has already been published.""" if not self._db: return False @@ -185,7 +185,7 @@ class FIRMSAdapter(SourceAdapter): ) return cur.fetchone() is not None - def _mark_published(self, stable_id: str) -> None: + def mark_published(self, stable_id: str) -> None: """Mark an event as published.""" if not self._db: return @@ -383,12 +383,12 @@ class FIRMSAdapter(SourceAdapter): row["longitude"], ) - if self._is_published(stable_id): + if self.is_published(stable_id): continue event = self._row_to_event(row, satellite) yield event - self._mark_published(stable_id) + self.mark_published(stable_id) new_count += 1 total_new += new_count diff --git a/tests/test_firms.py b/tests/test_firms.py index fb42251..8e569ad 100644 --- a/tests/test_firms.py +++ b/tests/test_firms.py @@ -246,13 +246,13 @@ class TestDeduplication: stable_id = "VIIRS_SNPP_NRT:2026-05-16:1430:45.123:-116.456" # Not published initially - assert not adapter._is_published(stable_id) + assert not adapter.is_published(stable_id) # Mark as published - adapter._mark_published(stable_id) + adapter.mark_published(stable_id) # Now should be published - assert adapter._is_published(stable_id) + assert adapter.is_published(stable_id) await adapter.shutdown() From 95853200b2583ae1dee50a3e361999dc313dfc9d Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 20:11:12 +0000 Subject: [PATCH 21/22] fix(firms): use public sweep_old_ids method Match NWS adapter pattern for supervisor compatibility. Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/firms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/central/adapters/firms.py b/src/central/adapters/firms.py index 48b5415..c882a64 100644 --- a/src/central/adapters/firms.py +++ b/src/central/adapters/firms.py @@ -149,7 +149,7 @@ class FIRMSAdapter(SourceAdapter): self._db.commit() # Sweep old entries on startup (48h for FIRMS) - self._sweep_old_ids() + self.sweep_old_ids() logger.info( "FIRMS adapter started", @@ -200,7 +200,7 @@ class FIRMSAdapter(SourceAdapter): ) self._db.commit() - def _sweep_old_ids(self) -> int: + def sweep_old_ids(self) -> int: """Remove published_ids older than 48 hours. Returns count deleted.""" if not self._db: return 0 @@ -357,7 +357,7 @@ class FIRMSAdapter(SourceAdapter): return # Sweep old dedup entries periodically - self._sweep_old_ids() + self.sweep_old_ids() total_features = 0 total_new = 0 From cbe9e50383c3717f5e729c10693f4efde59d0e49 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Sat, 16 May 2026 20:21:34 +0000 Subject: [PATCH 22/22] refactor(supervisor): use adapter registry pattern - Add _ADAPTER_REGISTRY dict for adapter class lookup - Unify adapter __init__ signatures (all take config, config_store, cursor_db_path) - NWSAdapter now accepts config_store param (unused, for signature uniformity) - Adding new adapters requires only one dict entry, no supervisor changes Co-Authored-By: Claude Opus 4.5 --- src/central/adapters/nws.py | 2 ++ src/central/supervisor.py | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/central/adapters/nws.py b/src/central/adapters/nws.py index d2ad155..fa12f98 100644 --- a/src/central/adapters/nws.py +++ b/src/central/adapters/nws.py @@ -20,6 +20,7 @@ from tenacity import ( from central import __version__ from central.adapter import SourceAdapter from central.config_models import AdapterConfig, RegionConfig +from central.config_store import ConfigStore from central.models import Event, Geo from shapely.geometry import box as shapely_box, shape as shapely_shape @@ -196,6 +197,7 @@ class NWSAdapter(SourceAdapter): def __init__( self, config: AdapterConfig, + config_store: ConfigStore, cursor_db_path: Path, ) -> None: self.cursor_db_path = cursor_db_path diff --git a/src/central/supervisor.py b/src/central/supervisor.py index beb8f9f..7ce0daf 100644 --- a/src/central/supervisor.py +++ b/src/central/supervisor.py @@ -24,6 +24,12 @@ from central.bootstrap_config import get_settings from central.models import subject_for_event from central.stream_manager import StreamManager +# Adapter registry - add new adapters here +_ADAPTER_REGISTRY: dict[str, type[SourceAdapter]] = { + "nws": NWSAdapter, + "firms": FIRMSAdapter, +} + CURSOR_DB_PATH = Path("/var/lib/central/cursors.db") # Stream subject mappings @@ -152,16 +158,14 @@ class Supervisor: def _create_adapter(self, config: AdapterConfig) -> SourceAdapter: """Create an adapter instance based on config name.""" - if config.name == "nws": - return NWSAdapter(config=config, cursor_db_path=CURSOR_DB_PATH) - elif config.name == "firms": - return FIRMSAdapter( - config=config, - config_store=self._config_store, - cursor_db_path=CURSOR_DB_PATH, - ) - else: + cls = _ADAPTER_REGISTRY.get(config.name) + if cls is None: raise ValueError(f"Unknown adapter type: {config.name}") + return cls( + config=config, + config_store=self._config_store, + cursor_db_path=CURSOR_DB_PATH, + ) async def _run_adapter_loop(self, state: AdapterState) -> None: """Run an adapter poll loop with rate-limit aware scheduling."""