Merge pull request #82 from zvx-echo6/v0_9_19_1_nws_sweep_scope

v0.9.19.1 - scope nws sweep_old_ids to its own adapter
This commit is contained in:
malice 2026-05-27 01:23:38 -06:00 committed by GitHub
commit f09f749052
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 32 additions and 36 deletions

View file

@ -211,6 +211,7 @@ class NWSAdapter(SourceAdapter):
requires_api_key = None requires_api_key = None
wizard_order = 1 wizard_order = 1
default_cadence_s = 60 default_cadence_s = 60
dedup_sweep_days = 8
# Alerts cover forecast zones/counties (polygons), not a single point. # Alerts cover forecast zones/counties (polygons), not a single point.
enrichment_locations = [] enrichment_locations = []
@ -393,42 +394,6 @@ class NWSAdapter(SourceAdapter):
) )
self._db.commit() self._db.commit()
def is_published(self, event_id: str) -> bool:
"""Check if an event has already been published."""
if not self._db:
return False
cur = self._db.execute(
"SELECT 1 FROM published_ids WHERE adapter = ? AND event_id = ?",
(self.name, event_id)
)
return cur.fetchone() is not None
def mark_published(self, event_id: str) -> None:
"""Mark an event as published."""
if not self._db:
return
self._db.execute(
"""
INSERT INTO published_ids (adapter, event_id, first_seen, last_seen)
VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
ON CONFLICT (adapter, event_id) DO UPDATE SET
last_seen = CURRENT_TIMESTAMP
""",
(self.name, event_id)
)
self._db.commit()
# TODO(v0.9.19.1): unscoped global DELETE -- clobbers other adapters' dedup rows; scope to adapter + move to base.
def sweep_old_ids(self) -> int:
"""Remove published_ids older than 8 days. Returns count deleted."""
if not self._db:
return 0
cur = self._db.execute(
"DELETE FROM published_ids WHERE last_seen < datetime('now', '-8 days')"
)
self._db.commit()
return cur.rowcount
@retry( @retry(
stop=stop_after_attempt(5), stop=stop_after_attempt(5),
wait=wait_exponential_jitter(initial=1, max=60), wait=wait_exponential_jitter(initial=1, max=60),

View file

@ -2,6 +2,7 @@
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
import sqlite3
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
@ -353,6 +354,36 @@ class TestDeduplication:
assert event2 is not None assert event2 is not None
assert event1.id == event2.id assert event1.id == event2.id
def test_sweep_only_deletes_own_adapter_rows(
self, adapter: NWSAdapter, tmp_path: Path
) -> None:
"""Regression (v0.9.19.1): sweep_old_ids must be adapter-scoped.
NWS previously ran an unscoped global DELETE that purged *every*
adapter's published_ids older than 8 days; the inherited base method
scopes the delete to ``adapter = ?``.
"""
adapter._db = sqlite3.connect(tmp_path / "dedup.db")
adapter._db.execute(
"CREATE TABLE published_ids (adapter TEXT, event_id TEXT, "
"first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"PRIMARY KEY (adapter, event_id))"
)
for adp in ("nws", "eonet"):
adapter._db.execute(
"INSERT INTO published_ids (adapter, event_id, last_seen) "
"VALUES (?, 'old', datetime('now', '-9 days'))",
(adp,),
)
adapter._db.commit()
assert adapter.dedup_sweep_days == 8
assert adapter.sweep_old_ids() == 1 # only the nws row
survivors = {
r[0] for r in adapter._db.execute("SELECT adapter FROM published_ids")
}
assert survivors == {"eonet"} # foreign adapter's row survives
class TestGeometry: class TestGeometry:
"""Tests for geometry computation.""" """Tests for geometry computation."""