Merge pull request #82 from zvx-echo6/v0_9_19_1_nws_sweep_scope

v0.9.19.1 - scope nws sweep_old_ids to its own adapter
This commit is contained in:
malice 2026-05-27 01:23:38 -06:00 committed by GitHub
commit f09f749052
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 32 additions and 36 deletions

View file

@ -211,6 +211,7 @@ class NWSAdapter(SourceAdapter):
requires_api_key = None
wizard_order = 1
default_cadence_s = 60
dedup_sweep_days = 8
# Alerts cover forecast zones/counties (polygons), not a single point.
enrichment_locations = []
@ -393,42 +394,6 @@ class NWSAdapter(SourceAdapter):
)
self._db.commit()
def is_published(self, event_id: str) -> bool:
"""Check if an event has already been published."""
if not self._db:
return False
cur = self._db.execute(
"SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?",
(self.name, event_id)
)
return cur.fetchone() is not None
def mark_published(self, event_id: str) -> None:
"""Mark an event as published."""
if not self._db:
return
self._db.execute(
"""
INSERT INTO published_ids (adapter, event_id, first_seen, last_seen)
VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
ON CONFLICT (adapter, event_id) DO UPDATE SET
last_seen = CURRENT_TIMESTAMP
""",
(self.name, event_id)
)
self._db.commit()
# TODO(v0.9.19.1): unscoped global DELETE -- clobbers other adapters' dedup rows; scope to adapter + move to base.
def sweep_old_ids(self) -> int:
"""Remove published_ids older than 8 days. Returns count deleted."""
if not self._db:
return 0
cur = self._db.execute(
"DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')"
)
self._db.commit()
return cur.rowcount
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential_jitter(initial=1, max=60),

View file

@ -2,6 +2,7 @@
from datetime import datetime, timezone
from pathlib import Path
import sqlite3
from unittest.mock import MagicMock
import pytest
@ -353,6 +354,36 @@ class TestDeduplication:
assert event2 is not None
assert event1.id == event2.id
def test_sweep_only_deletes_own_adapter_rows(
self, adapter: NWSAdapter, tmp_path: Path
) -> None:
"""Regression (v0.9.19.1): sweep_old_ids must be adapter-scoped.
NWS previously ran an unscoped global DELETE that purged *every*
adapter's published_ids older than 8 days; the inherited base method
scopes the delete to ``adapter = ?``.
"""
adapter._db = sqlite3.connect(tmp_path / "dedup.db")
adapter._db.execute(
"CREATE TABLE published_ids (adapter TEXT, event_id TEXT, "
"first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"PRIMARY KEY (adapter, event_id))"
)
for adp in ("nws", "eonet"):
adapter._db.execute(
"INSERT INTO published_ids (adapter, event_id, last_seen) "
"VALUES (?, 'old', datetime('now', '-9 days'))",
(adp,),
)
adapter._db.commit()
assert adapter.dedup_sweep_days == 8
assert adapter.sweep_old_ids() == 1 # only the nws row
survivors = {
r[0] for r in adapter._db.execute("SELECT adapter FROM published_ids")
}
assert survivors == {"eonet"} # foreign adapter's row survives
class TestGeometry:
"""Tests for geometry computation."""