fix(nws): scope sweep_old_ids to its own adapter (v0.9.19.1)

NWSAdapter.sweep_old_ids ran an unscoped global DELETE FROM published_ids
(no adapter = ? filter), so each per-adapter sweep cycle purged EVERY
adapter dedup rows older than 8 days -- capping dedup memory at ~8d for the
8 adapters whose window exceeds it (eonet/gdacs/nwis 30d; swpc_kindex/
protons/alerts + wfigs_incidents/perimeters 14d). Events that went silent
past 8d then re-listed were re-published as downstream duplicates.

Extract the 3 dedup methods onto the SourceAdapter base (finishing v0.9.19;
is_published/mark_published were byte-identical) and preserve the 8-day
window via dedup_sweep_days = 8, so the inherited sweep scopes to adapter=?.
Effect: each adapter retains dedup to its configured window -> fewer
downstream duplicates. Retention-window scoping, NOT a dedup-forward-only
change (no event-shape change, no migration/backfill).

Adds a regression guard asserting a foreign adapter row survives an nws
sweep. Suite 900->901/1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Johnson 2026-05-27 07:23:05 +00:00
commit 5c6d77381b
2 changed files with 32 additions and 36 deletions

View file

@ -2,6 +2,7 @@
from datetime import datetime, timezone
from pathlib import Path
import sqlite3
from unittest.mock import MagicMock
import pytest
@ -353,6 +354,36 @@ class TestDeduplication:
assert event2 is not None
assert event1.id == event2.id
def test_sweep_only_deletes_own_adapter_rows(
self, adapter: NWSAdapter, tmp_path: Path
) -> None:
"""Regression (v0.9.19.1): sweep_old_ids must be adapter-scoped.
NWS previously ran an unscoped global DELETE that purged *every*
adapter's published_ids older than 8 days; the inherited base method
scopes the delete to ``adapter = ?``.
"""
adapter._db = sqlite3.connect(tmp_path / "dedup.db")
adapter._db.execute(
"CREATE TABLE published_ids (adapter TEXT, event_id TEXT, "
"first_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, "
"PRIMARY KEY (adapter, event_id))"
)
for adp in ("nws", "eonet"):
adapter._db.execute(
"INSERT INTO published_ids (adapter, event_id, last_seen) "
"VALUES (?, 'old', datetime('now', '-9 days'))",
(adp,),
)
adapter._db.commit()
assert adapter.dedup_sweep_days == 8
assert adapter.sweep_old_ids() == 1 # only the nws row
survivors = {
r[0] for r in adapter._db.execute("SELECT adapter FROM published_ids")
}
assert survivors == {"eonet"} # foreign adapter's row survives
class TestGeometry:
"""Tests for geometry computation."""