central/tests/test_nominatim_backend.py

118 lines
3.7 KiB
Python
Raw Permalink Normal View History

feat(3-K): real geocoder backends + producer-doc reframe + consumer-doc enrichment Second of three PRs for v0.5.0 (J shipped the framework; this fills in real backends + documents the reframed design principle in-tree; L is the events tab + map fix, then tag). Backends (all satisfy GeocoderBackend; never raise, all-null on any failure): - NaviBackend — composed Navi /api/reverse/<lat>/<lon> (name/address + timezone + landclass + elevation in one call). Near-passthrough: response already matches the canonical 9-field shape. Best-effort warmup ping (Boise) on construction when a loop is running; config `headers` slot for a future Authorization: Bearer (config-only, no code change). Default base_url http://192.168.1.130:8440. - PhotonBackend — raw Photon /reverse?lat&lon&limit=1 (name/address only). Maps features[0].properties; postal_code <- postcode; timezone/landclass/ elevation_m null (Navi-composed-endpoint extras). - NominatimBackend — OSM Nominatim /reverse?format=jsonv2 (name/address only). Configurable rate limit (default 1/sec; 0 disables for self-hosted) + required User-Agent. Maps the address block; landclass/elevation_m/timezone null. Registered all three in supervisor _BACKEND_REGISTRY (resolved by EnrichmentConfig backend_class name). Docs — design pivot now in-tree: - PRODUCER §2 reframed: the verbatim Matt quote stays; the translation inverts. Central is the consumer's only data plane (consumers can't do follow-up lookups), so enrich deliberately and centrally, namespaced under _enriched, failing to null. "No enrichment" is gone. - PRODUCER §10.1 inverted: enrichment is expected; the anti-pattern is doing it OUTSIDE the framework (inline in poll(), bypassing cache + _enriched namespacing + the never-raise safety net). - PRODUCER new §13 Enrichment contract: Enricher / GeocoderEnricher / GeocoderBackend Protocols, NoOpBackend default, sqlite cache + TTL + cache-all-null + don't-cache-on-raise semantics, _enriched.<name> provenance, per-field coverage matrix (cross-checked against GEOCODER_FIELDS), and the landclass antimeridian known wrinkle. - CONSUMER FIRMS section: documents the data._enriched.geocoder bundle (9 fields), per-region coverage (US-full, non-US timezone+elevation), and the antimeridian landclass caveat. Tests: - test_navi/photon/nominatim_backend.py — happy-path field mapping, null handling, extra-key drop, network/timeout/non-200/malformed -> all-null (never raises), Nominatim rate-limit (disabled + spacing) + User-Agent. Env-gated live Navi smoke (NAVI_INTEGRATION_TEST=1; skipped by default — the 192.168.1.130 endpoint isn't reachable from CT104's segment). - test_producer_doc.py — +4: §2 verbatim quote present, §10.1 subsection exists, §13 names all four protocol types, §13 coverage matrix == GEOCODER_FIELDS (derived from code, not hardcoded). Verification: full pytest 525 passed, 1 skipped (was 495; +30 backend + 4 doc tests, -1 the env-gated skip). grep subject_for_event/_ADAPTER_REGISTRY clean. All three backends import + resolve via the registry. Flagged for later (NOT done here): adapters besides FIRMS that should declare enrichment_locations (nwis, eonet, gdacs, usgs_quake, wfigs_*) — that's PR L scope alongside the events tab. See PR description. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 16:10:44 +00:00
"""Tests for NominatimBackend (OSM Nominatim /reverse jsonv2)."""
from unittest.mock import AsyncMock, patch
import pytest
from central.enrichment.backends.nominatim import (
DEFAULT_USER_AGENT,
NominatimBackend,
)
from central.enrichment.geocoder import GEOCODER_FIELDS, all_null_bundle
_NOMINATIM_OK = {
"name": "Boise",
"display_name": "Boise, Ada County, Idaho, United States",
"address": {
"city": "Boise",
"county": "Ada County",
"state": "Idaho",
"country": "United States",
"postcode": "83702",
},
}
def _backend(**kw) -> NominatimBackend:
kw.setdefault("base_url", "http://nominatim.test")
kw.setdefault("rate_limit_per_sec", 0) # disabled by default in tests
return NominatimBackend(**kw)
def test_url_and_format():
b = _backend()
url = b._url(43.6, -116.2)
assert url.startswith("http://nominatim.test/reverse?")
assert "format=jsonv2" in url and "lat=43.6" in url and "lon=-116.2" in url
def test_user_agent_header_present():
b = _backend()
assert b._request_headers()["User-Agent"] == DEFAULT_USER_AGENT
def test_custom_user_agent():
b = _backend(user_agent="myapp/1.0 (me@example.com)")
assert b._request_headers()["User-Agent"] == "myapp/1.0 (me@example.com)"
@pytest.mark.asyncio
async def test_happy_path_maps_address_block():
b = _backend()
b._fetch = AsyncMock(return_value=dict(_NOMINATIM_OK))
result = await b.reverse(43.6, -116.2)
assert set(result.keys()) == set(GEOCODER_FIELDS)
assert result["name"] == "Boise"
assert result["city"] == "Boise"
assert result["county"] == "Ada County"
assert result["state"] == "Idaho"
assert result["country"] == "United States"
assert result["postal_code"] == "83702"
@pytest.mark.asyncio
async def test_navi_only_fields_null():
b = _backend()
b._fetch = AsyncMock(return_value=dict(_NOMINATIM_OK))
result = await b.reverse(43.6, -116.2)
assert result["timezone"] is None
assert result["landclass"] is None
assert result["elevation_m"] is None
@pytest.mark.asyncio
async def test_city_falls_back_to_town_then_village():
b = _backend()
b._fetch = AsyncMock(return_value={"address": {"village": "Tinytown"}})
result = await b.reverse(1.0, 2.0)
assert result["city"] == "Tinytown"
@pytest.mark.asyncio
async def test_network_error_returns_all_null():
b = _backend()
b._fetch = AsyncMock(side_effect=ConnectionError("down"))
assert await b.reverse(1.0, 2.0) == all_null_bundle()
@pytest.mark.asyncio
async def test_malformed_json_returns_all_null():
b = _backend()
b._fetch = AsyncMock(side_effect=ValueError("bad"))
assert await b.reverse(1.0, 2.0) == all_null_bundle()
@pytest.mark.asyncio
async def test_rate_limit_disabled_does_not_sleep():
b = _backend(rate_limit_per_sec=0)
with patch("central.enrichment.backends.nominatim.asyncio.sleep") as slp:
await b._throttle()
await b._throttle()
slp.assert_not_called()
@pytest.mark.asyncio
async def test_rate_limit_spaces_consecutive_requests():
"""With a 1/sec limit, the second back-to-back call must sleep a positive
interval. Mock monotonic to a fixed instant so the throttle computes a wait."""
b = _backend(rate_limit_per_sec=1.0)
sleeps: list[float] = []
async def _fake_sleep(d):
sleeps.append(d)
with patch("central.enrichment.backends.nominatim.time.monotonic", return_value=100.0), \
patch("central.enrichment.backends.nominatim.asyncio.sleep", side_effect=_fake_sleep):
await b._throttle() # first: last_request_at was 0 -> no wait
await b._throttle() # second: now==100, last==100 -> wait ~1.0s
assert any(d > 0 for d in sleeps), f"expected a positive throttle sleep, got {sleeps}"