From 557230c7a7546ac908f4d4c4574728410061bcec Mon Sep 17 00:00:00 2001 From: malice Date: Sat, 6 Jun 2026 01:59:45 -0600 Subject: [PATCH] v0.10.2.1: drop broken incremental where-clause in wfigs adapters (use where=1=1) (#87) Closes #87 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/central/adapters/wfigs_incidents.py | 20 ++-- src/central/adapters/wfigs_perimeters.py | 20 ++-- tests/test_wfigs.py | 114 +++++++++++++++++++++++ 3 files changed, 133 insertions(+), 21 deletions(-) diff --git a/src/central/adapters/wfigs_incidents.py b/src/central/adapters/wfigs_incidents.py index 7538fc2..f11b80c 100644 --- a/src/central/adapters/wfigs_incidents.py +++ b/src/central/adapters/wfigs_incidents.py @@ -73,7 +73,6 @@ class WFIGSIncidentsAdapter(SourceAdapter): self._cursor_db_path = cursor_db_path self._session: aiohttp.ClientSession | None = None self._db: sqlite3.Connection | None = None - self._last_poll_time: datetime | None = None # Parse region from settings region_dict = config.settings.get("region") @@ -164,12 +163,16 @@ class WFIGSIncidentsAdapter(SourceAdapter): "f": "geojson", } - # Time filter: only fetch modified since last poll - if self._last_poll_time: - iso_time = self._last_poll_time.strftime("%Y-%m-%d %H:%M:%S") - params["where"] = f"ModifiedOnDateTime > timestamp '{iso_time}'" - else: - params["where"] = "1=1" + # v0.10.2.1: full-page fetch every poll. The previous incremental + # `ModifiedOnDateTime > timestamp ''` clause silently + # returned 0 features because the upstream layer renamed the column + # to `ModifiedOnDateTime_dt` (epoch ms) and our where-clause both + # used the old name AND compared against a SQL timestamp literal. + # ArcGIS treated the clause as not-matching; the fall-off detector + # then tombstoned every previously-observed IRWINID on poll #2. + # `wfigs_observed` + `published_ids` already de-duplicate the full + # page, so re-fetching every poll is correct and idempotent. + params["where"] = "1=1" # Bbox filter if region configured if self.region: @@ -327,9 +330,6 @@ class WFIGSIncidentsAdapter(SourceAdapter): cleanup_old_observed(self._db, LAYER_NAME) self.sweep_old_ids() - # Update last poll time - self._last_poll_time = datetime.now(timezone.utc) - logger.info( "WFIGS incidents poll completed", extra={ diff --git a/src/central/adapters/wfigs_perimeters.py b/src/central/adapters/wfigs_perimeters.py index c083db4..541ed17 100644 --- a/src/central/adapters/wfigs_perimeters.py +++ b/src/central/adapters/wfigs_perimeters.py @@ -87,7 +87,6 @@ class WFIGSPerimetersAdapter(SourceAdapter): self._cursor_db_path = cursor_db_path self._session: aiohttp.ClientSession | None = None self._db: sqlite3.Connection | None = None - self._last_poll_time: datetime | None = None # Parse region from settings region_dict = config.settings.get("region") @@ -178,13 +177,15 @@ class WFIGSPerimetersAdapter(SourceAdapter): "f": "geojson", } - # Time filter: only fetch modified since last poll - # Note: perimeters use attr_ModifiedOnDateTime_dt field - if self._last_poll_time: - iso_time = self._last_poll_time.strftime("%Y-%m-%d %H:%M:%S") - params["where"] = f"attr_ModifiedOnDateTime_dt > timestamp '{iso_time}'" - else: - params["where"] = "1=1" + # v0.10.2.1: full-page fetch every poll. The previous incremental + # `attr_ModifiedOnDateTime_dt > timestamp ''` clause + # silently returned 0 features -- the column stores epoch ms + # integers, not SQL timestamps, so the comparison never matched. + # The fall-off detector then tombstoned every previously-observed + # IRWINID on poll #2 (e.g. Summit Creek 1924-acre WF in Idaho). + # `wfigs_observed` + `published_ids` already de-duplicate the full + # page, so re-fetching every poll is correct and idempotent. + params["where"] = "1=1" # Bbox filter if region configured if self.region: @@ -353,9 +354,6 @@ class WFIGSPerimetersAdapter(SourceAdapter): cleanup_old_observed(self._db, LAYER_NAME) self.sweep_old_ids() - # Update last poll time - self._last_poll_time = datetime.now(timezone.utc) - logger.info( "WFIGS perimeters poll completed", extra={ diff --git a/tests/test_wfigs.py b/tests/test_wfigs.py index fa8fbe6..254a5f8 100644 --- a/tests/test_wfigs.py +++ b/tests/test_wfigs.py @@ -315,6 +315,64 @@ class TestWFIGSIncidentsAdapter: await adapter.shutdown() + @pytest.mark.asyncio + async def test_where_clause_is_1_eq_1_on_every_poll( + self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path + ): + """v0.10.2.1 regression guard: every poll sends ``where=1=1``. + + The pre-v0.10.2.1 adapter sent ``where=ModifiedOnDateTime > timestamp 'X'`` + on every poll after the first -- a clause that silently returned 0 + features because the upstream layer renamed the column to + ``ModifiedOnDateTime_dt``. That made the fall-off detector tombstone + every previously-observed IRWINID on poll #2 (the Summit Creek bug). + Both the first poll and every poll thereafter must now send the + unconditional full-page query. + """ + from central.adapters.wfigs_incidents import WFIGSIncidentsAdapter + + adapter = WFIGSIncidentsAdapter(mock_config, mock_config_store, cursor_db_path) + await adapter.startup() + + captured: list[dict] = [] + + def _capture(url, params=None, **kw): + captured.append(dict(params or {})) + resp = AsyncMock() + resp.raise_for_status = MagicMock() + resp.json = AsyncMock(return_value=SAMPLE_INCIDENTS_RESPONSE) + return AsyncMock(__aenter__=AsyncMock(return_value=resp), __aexit__=AsyncMock()) + + with patch.object(adapter._session, "get", side_effect=_capture): + _ = [e async for e in adapter.poll()] # poll 1 + _ = [e async for e in adapter.poll()] # poll 2 + + await adapter.shutdown() + + assert len(captured) == 2, "expected one HTTP call per poll" + for i, params in enumerate(captured): + assert params.get("where") == "1=1", ( + f"poll #{i+1} sent where={params.get('where')!r}; expected '1=1'" + ) + # Regression guard against ANY incremental time clause sneaking back. + for v in params.values(): + assert "ModifiedOnDateTime" not in str(v), ( + f"poll #{i+1} param value referenced ModifiedOnDateTime: {v!r}" + ) + + @pytest.mark.asyncio + async def test_no_last_poll_time_attribute( + self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path + ): + """The vestigial ``_last_poll_time`` attribute must not be re-introduced.""" + from central.adapters.wfigs_incidents import WFIGSIncidentsAdapter + + adapter = WFIGSIncidentsAdapter(mock_config, mock_config_store, cursor_db_path) + assert not hasattr(adapter, "_last_poll_time"), ( + "_last_poll_time was the in-memory cursor driving the broken " + "incremental where-clause; do not re-add" + ) + @pytest.mark.asyncio async def test_fall_off_emits_removal( self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path @@ -596,3 +654,59 @@ class TestWFIGSPerimetersAdapter: assert _as_geometry('{"type": "Point"}')["type"] == "Point" assert _as_geometry("not json") is None assert _as_geometry(None) is None + + @pytest.mark.asyncio + async def test_where_clause_is_1_eq_1_on_every_poll( + self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path + ): + """v0.10.2.1 regression guard: every poll sends ``where=1=1``. + + The pre-v0.10.2.1 perimeters adapter sent + ``where=attr_ModifiedOnDateTime_dt > timestamp 'X'`` on every poll + after the first -- a type-broken comparison (epoch ms vs SQL + timestamp literal) that silently returned 0 features. The fall-off + detector then tombstoned Summit Creek (1924-acre Idaho WF, 85% + contained) on poll #2 after the v0.10.2 supervisor restart. + """ + from central.adapters.wfigs_perimeters import WFIGSPerimetersAdapter + + adapter = WFIGSPerimetersAdapter(mock_config, mock_config_store, cursor_db_path) + await adapter.startup() + + captured: list[dict] = [] + + def _capture(url, params=None, **kw): + captured.append(dict(params or {})) + resp = AsyncMock() + resp.raise_for_status = MagicMock() + resp.json = AsyncMock(return_value=SAMPLE_PERIMETERS_RESPONSE) + return AsyncMock(__aenter__=AsyncMock(return_value=resp), __aexit__=AsyncMock()) + + with patch.object(adapter._session, "get", side_effect=_capture): + _ = [e async for e in adapter.poll()] # poll 1 + _ = [e async for e in adapter.poll()] # poll 2 + + await adapter.shutdown() + + assert len(captured) == 2, "expected one HTTP call per poll" + for i, params in enumerate(captured): + assert params.get("where") == "1=1", ( + f"poll #{i+1} sent where={params.get('where')!r}; expected '1=1'" + ) + for v in params.values(): + assert "ModifiedOnDateTime" not in str(v), ( + f"poll #{i+1} param value referenced ModifiedOnDateTime: {v!r}" + ) + + @pytest.mark.asyncio + async def test_no_last_poll_time_attribute( + self, mock_config: AdapterConfig, mock_config_store: MagicMock, cursor_db_path: Path + ): + """The vestigial ``_last_poll_time`` attribute must not be re-introduced.""" + from central.adapters.wfigs_perimeters import WFIGSPerimetersAdapter + + adapter = WFIGSPerimetersAdapter(mock_config, mock_config_store, cursor_db_path) + assert not hasattr(adapter, "_last_poll_time"), ( + "_last_poll_time was the in-memory cursor driving the broken " + "incremental where-clause; do not re-add" + )