mirror of
https://github.com/zvx-echo6/central.git
synced 2026-05-21 18:14:44 +02:00
feat(3-K): real geocoder backends + producer-doc reframe + consumer-doc enrichment
Second of three PRs for v0.5.0 (J shipped the framework; this fills in real backends + documents the reframed design principle in-tree; L is the events tab + map fix, then tag). Backends (all satisfy GeocoderBackend; never raise, all-null on any failure): - NaviBackend — composed Navi /api/reverse/<lat>/<lon> (name/address + timezone + landclass + elevation in one call). Near-passthrough: response already matches the canonical 9-field shape. Best-effort warmup ping (Boise) on construction when a loop is running; config `headers` slot for a future Authorization: Bearer (config-only, no code change). Default base_url http://192.168.1.130:8440. - PhotonBackend — raw Photon /reverse?lat&lon&limit=1 (name/address only). Maps features[0].properties; postal_code <- postcode; timezone/landclass/ elevation_m null (Navi-composed-endpoint extras). - NominatimBackend — OSM Nominatim /reverse?format=jsonv2 (name/address only). Configurable rate limit (default 1/sec; 0 disables for self-hosted) + required User-Agent. Maps the address block; landclass/elevation_m/timezone null. Registered all three in supervisor _BACKEND_REGISTRY (resolved by EnrichmentConfig backend_class name). Docs — design pivot now in-tree: - PRODUCER §2 reframed: the verbatim Matt quote stays; the translation inverts. Central is the consumer's only data plane (consumers can't do follow-up lookups), so enrich deliberately and centrally, namespaced under _enriched, failing to null. "No enrichment" is gone. - PRODUCER §10.1 inverted: enrichment is expected; the anti-pattern is doing it OUTSIDE the framework (inline in poll(), bypassing cache + _enriched namespacing + the never-raise safety net). - PRODUCER new §13 Enrichment contract: Enricher / GeocoderEnricher / GeocoderBackend Protocols, NoOpBackend default, sqlite cache + TTL + cache-all-null + don't-cache-on-raise semantics, _enriched.<name> provenance, per-field coverage matrix (cross-checked against GEOCODER_FIELDS), and the landclass antimeridian known wrinkle. - CONSUMER FIRMS section: documents the data._enriched.geocoder bundle (9 fields), per-region coverage (US-full, non-US timezone+elevation), and the antimeridian landclass caveat. Tests: - test_navi/photon/nominatim_backend.py — happy-path field mapping, null handling, extra-key drop, network/timeout/non-200/malformed -> all-null (never raises), Nominatim rate-limit (disabled + spacing) + User-Agent. Env-gated live Navi smoke (NAVI_INTEGRATION_TEST=1; skipped by default — the 192.168.1.130 endpoint isn't reachable from CT104's segment). - test_producer_doc.py — +4: §2 verbatim quote present, §10.1 subsection exists, §13 names all four protocol types, §13 coverage matrix == GEOCODER_FIELDS (derived from code, not hardcoded). Verification: full pytest 525 passed, 1 skipped (was 495; +30 backend + 4 doc tests, -1 the env-gated skip). grep subject_for_event/_ADAPTER_REGISTRY clean. All three backends import + resolve via the registry. Flagged for later (NOT done here): adapters besides FIRMS that should declare enrichment_locations (nwis, eonet, gdacs, usgs_quake, wfigs_*) — that's PR L scope alongside the events tab. See PR description. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a477285b3f
commit
98b050b2af
11 changed files with 833 additions and 37 deletions
120
tests/test_navi_backend.py
Normal file
120
tests/test_navi_backend.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
"""Tests for NaviBackend (composed Navi /api/reverse endpoint).
|
||||
|
||||
HTTP is exercised via patching the backend's `_fetch` (the codebase has no
|
||||
aioresponses/respx dep); URL construction is asserted on the pure `_url`
|
||||
helper. An env-gated integration smoke against the live Navi endpoint is
|
||||
skipped by default.
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from central.enrichment.backends.navi import NaviBackend
|
||||
from central.enrichment.geocoder import GEOCODER_FIELDS, all_null_bundle
|
||||
|
||||
# Full Navi response — already canonical shape.
|
||||
_NAVI_OK = {
|
||||
"name": "Where you are",
|
||||
"city": "Boise",
|
||||
"county": "Ada",
|
||||
"state": "Idaho",
|
||||
"country": "United States",
|
||||
"postal_code": "83702",
|
||||
"timezone": "America/Boise",
|
||||
"landclass": "Public — National Forest",
|
||||
"elevation_m": 824,
|
||||
}
|
||||
|
||||
|
||||
def _backend() -> NaviBackend:
|
||||
# warmup=False so construction issues no background task in tests.
|
||||
return NaviBackend(base_url="http://navi.test:8440", warmup=False)
|
||||
|
||||
|
||||
def test_url_construction():
|
||||
b = _backend()
|
||||
assert b._url(43.615, -116.2023) == "http://navi.test:8440/api/reverse/43.615/-116.2023"
|
||||
|
||||
|
||||
def test_base_url_trailing_slash_stripped():
|
||||
b = NaviBackend(base_url="http://navi.test:8440/", warmup=False)
|
||||
assert b._url(1.0, 2.0) == "http://navi.test:8440/api/reverse/1.0/2.0"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_happy_path_passthrough():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=dict(_NAVI_OK))
|
||||
result = await b.reverse(43.615, -116.2023)
|
||||
assert result == _NAVI_OK
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_partial_nulls_preserved():
|
||||
"""Navi 200-with-nulls (non-US: timezone + elevation, rest null)."""
|
||||
partial = {**all_null_bundle(), "timezone": "Europe/Paris", "elevation_m": 35}
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=partial)
|
||||
result = await b.reverse(48.85, 2.35)
|
||||
assert result["timezone"] == "Europe/Paris"
|
||||
assert result["elevation_m"] == 35
|
||||
assert result["city"] is None
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extra_keys_dropped():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value={**_NAVI_OK, "debug_internal": "leak"})
|
||||
result = await b.reverse(1.0, 2.0)
|
||||
assert "debug_internal" not in result
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_network_error_returns_all_null_never_raises():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ConnectionError("boom"))
|
||||
result = await b.reverse(1.0, 2.0)
|
||||
assert result == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_timeout_returns_all_null():
|
||||
import asyncio
|
||||
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=asyncio.TimeoutError())
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_malformed_response_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ValueError("not json"))
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_headers_passed_through_config():
|
||||
b = NaviBackend(base_url="http://navi.test", headers={"Authorization": "Bearer x"}, warmup=False)
|
||||
assert b._headers == {"Authorization": "Bearer x"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("NAVI_INTEGRATION_TEST") != "1",
|
||||
reason="set NAVI_INTEGRATION_TEST=1 to hit the live Navi endpoint",
|
||||
)
|
||||
async def test_live_navi_boise():
|
||||
"""Integration smoke against the real endpoint (default skipped)."""
|
||||
b = NaviBackend(warmup=False) # default base_url
|
||||
result = await b.reverse(43.6150, -116.2023)
|
||||
assert result["name"] == "Where you are"
|
||||
assert result["city"] == "Boise"
|
||||
assert result["state"] == "Idaho"
|
||||
assert result["elevation_m"] is not None
|
||||
assert abs(float(result["elevation_m"]) - 824) < 50
|
||||
118
tests/test_nominatim_backend.py
Normal file
118
tests/test_nominatim_backend.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
"""Tests for NominatimBackend (OSM Nominatim /reverse jsonv2)."""
|
||||
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from central.enrichment.backends.nominatim import (
|
||||
DEFAULT_USER_AGENT,
|
||||
NominatimBackend,
|
||||
)
|
||||
from central.enrichment.geocoder import GEOCODER_FIELDS, all_null_bundle
|
||||
|
||||
_NOMINATIM_OK = {
|
||||
"name": "Boise",
|
||||
"display_name": "Boise, Ada County, Idaho, United States",
|
||||
"address": {
|
||||
"city": "Boise",
|
||||
"county": "Ada County",
|
||||
"state": "Idaho",
|
||||
"country": "United States",
|
||||
"postcode": "83702",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _backend(**kw) -> NominatimBackend:
|
||||
kw.setdefault("base_url", "http://nominatim.test")
|
||||
kw.setdefault("rate_limit_per_sec", 0) # disabled by default in tests
|
||||
return NominatimBackend(**kw)
|
||||
|
||||
|
||||
def test_url_and_format():
|
||||
b = _backend()
|
||||
url = b._url(43.6, -116.2)
|
||||
assert url.startswith("http://nominatim.test/reverse?")
|
||||
assert "format=jsonv2" in url and "lat=43.6" in url and "lon=-116.2" in url
|
||||
|
||||
|
||||
def test_user_agent_header_present():
|
||||
b = _backend()
|
||||
assert b._request_headers()["User-Agent"] == DEFAULT_USER_AGENT
|
||||
|
||||
|
||||
def test_custom_user_agent():
|
||||
b = _backend(user_agent="myapp/1.0 (me@example.com)")
|
||||
assert b._request_headers()["User-Agent"] == "myapp/1.0 (me@example.com)"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_happy_path_maps_address_block():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=dict(_NOMINATIM_OK))
|
||||
result = await b.reverse(43.6, -116.2)
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
assert result["name"] == "Boise"
|
||||
assert result["city"] == "Boise"
|
||||
assert result["county"] == "Ada County"
|
||||
assert result["state"] == "Idaho"
|
||||
assert result["country"] == "United States"
|
||||
assert result["postal_code"] == "83702"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navi_only_fields_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=dict(_NOMINATIM_OK))
|
||||
result = await b.reverse(43.6, -116.2)
|
||||
assert result["timezone"] is None
|
||||
assert result["landclass"] is None
|
||||
assert result["elevation_m"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_city_falls_back_to_town_then_village():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value={"address": {"village": "Tinytown"}})
|
||||
result = await b.reverse(1.0, 2.0)
|
||||
assert result["city"] == "Tinytown"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_network_error_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ConnectionError("down"))
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_malformed_json_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ValueError("bad"))
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rate_limit_disabled_does_not_sleep():
|
||||
b = _backend(rate_limit_per_sec=0)
|
||||
with patch("central.enrichment.backends.nominatim.asyncio.sleep") as slp:
|
||||
await b._throttle()
|
||||
await b._throttle()
|
||||
slp.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rate_limit_spaces_consecutive_requests():
|
||||
"""With a 1/sec limit, the second back-to-back call must sleep a positive
|
||||
interval. Mock monotonic to a fixed instant so the throttle computes a wait."""
|
||||
b = _backend(rate_limit_per_sec=1.0)
|
||||
sleeps: list[float] = []
|
||||
|
||||
async def _fake_sleep(d):
|
||||
sleeps.append(d)
|
||||
|
||||
with patch("central.enrichment.backends.nominatim.time.monotonic", return_value=100.0), \
|
||||
patch("central.enrichment.backends.nominatim.asyncio.sleep", side_effect=_fake_sleep):
|
||||
await b._throttle() # first: last_request_at was 0 -> no wait
|
||||
await b._throttle() # second: now==100, last==100 -> wait ~1.0s
|
||||
assert any(d > 0 for d in sleeps), f"expected a positive throttle sleep, got {sleeps}"
|
||||
92
tests/test_photon_backend.py
Normal file
92
tests/test_photon_backend.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
"""Tests for PhotonBackend (raw Photon /reverse)."""
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from central.enrichment.backends.photon import PhotonBackend
|
||||
from central.enrichment.geocoder import GEOCODER_FIELDS, all_null_bundle
|
||||
|
||||
# Photon reverse response shape.
|
||||
_PHOTON_OK = {
|
||||
"features": [
|
||||
{
|
||||
"properties": {
|
||||
"name": "Boise",
|
||||
"city": "Boise",
|
||||
"county": "Ada County",
|
||||
"state": "Idaho",
|
||||
"country": "United States",
|
||||
"postcode": "83702",
|
||||
"osm_key": "place",
|
||||
},
|
||||
"geometry": {"type": "Point", "coordinates": [-116.2, 43.6]},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _backend() -> PhotonBackend:
|
||||
return PhotonBackend(base_url="http://photon.test:2322")
|
||||
|
||||
|
||||
def test_url_construction():
|
||||
b = _backend()
|
||||
url = b._url(43.6, -116.2)
|
||||
assert url.startswith("http://photon.test:2322/reverse?")
|
||||
assert "lat=43.6" in url and "lon=-116.2" in url and "limit=1" in url
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_happy_path_maps_to_canonical():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=dict(_PHOTON_OK))
|
||||
result = await b.reverse(43.6, -116.2)
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
assert result["city"] == "Boise"
|
||||
assert result["county"] == "Ada County"
|
||||
assert result["state"] == "Idaho"
|
||||
assert result["country"] == "United States"
|
||||
assert result["postal_code"] == "83702" # mapped from Photon 'postcode'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_navi_only_fields_are_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value=dict(_PHOTON_OK))
|
||||
result = await b.reverse(43.6, -116.2)
|
||||
assert result["timezone"] is None
|
||||
assert result["landclass"] is None
|
||||
assert result["elevation_m"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_features_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value={"features": []})
|
||||
assert await b.reverse(0.0, 0.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_properties_keys_become_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(return_value={"features": [{"properties": {"name": "X"}}]})
|
||||
result = await b.reverse(1.0, 2.0)
|
||||
assert result["name"] == "X"
|
||||
assert result["city"] is None
|
||||
assert result["postal_code"] is None
|
||||
assert set(result.keys()) == set(GEOCODER_FIELDS)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_network_error_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ConnectionError("down"))
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_malformed_json_returns_all_null():
|
||||
b = _backend()
|
||||
b._fetch = AsyncMock(side_effect=ValueError("bad json"))
|
||||
assert await b.reverse(1.0, 2.0) == all_null_bundle()
|
||||
|
|
@ -23,8 +23,14 @@ from pathlib import Path
|
|||
|
||||
from central.adapter import SourceAdapter
|
||||
from central.adapter_discovery import discover_adapters
|
||||
from central.enrichment.geocoder import GEOCODER_FIELDS
|
||||
from central.streams import STREAMS
|
||||
|
||||
# The verbatim design-principle quote that must stay in §2 (Matt, 2026-05-19).
|
||||
_DESIGN_PRINCIPLE_QUOTE = (
|
||||
"Central takes it all and gives it all. It's up to the pipe to do with it"
|
||||
)
|
||||
|
||||
DOC_PATH = Path(__file__).resolve().parents[1] / "docs" / "PRODUCER-INTEGRATION.md"
|
||||
|
||||
|
||||
|
|
@ -186,6 +192,48 @@ def test_streams_snippet_quotes_live_registry():
|
|||
)
|
||||
|
||||
|
||||
def _section(doc: str, header_re: str) -> str:
|
||||
"""Return the body of the section whose header matches header_re, up to the
|
||||
next same-or-higher-level header."""
|
||||
m = re.search(header_re + r"\s*\n(.*?)(?=^## |\Z)", doc, re.DOTALL | re.MULTILINE)
|
||||
assert m, f"doc missing section matching {header_re!r}"
|
||||
return m.group(1)
|
||||
|
||||
|
||||
def test_design_principle_quote_present_in_section_2():
|
||||
"""§2 must still carry the verbatim Matt quote — the reframe changes the
|
||||
translation beneath it, not the quote itself."""
|
||||
section = _section(_doc_text(), r"^## 2\. The design principle")
|
||||
assert _DESIGN_PRINCIPLE_QUOTE in section, "verbatim design-principle quote missing from §2"
|
||||
|
||||
|
||||
def test_anti_pattern_10_1_section_exists():
|
||||
"""§10.1 must still exist as a subsection (content reframed to
|
||||
'enrichment outside the framework', structure preserved)."""
|
||||
doc = _doc_text()
|
||||
assert re.search(r"^### 10\.1 ", doc, re.MULTILINE), "doc missing '### 10.1' subsection"
|
||||
|
||||
|
||||
def test_enrichment_contract_section_13_has_all_protocol_references():
|
||||
"""New §13 must name all four enrichment contract types verbatim."""
|
||||
section = _section(_doc_text(), r"^## 13\. Enrichment contract")
|
||||
for ref in ("Enricher", "GeocoderEnricher", "GeocoderBackend", "NoOpBackend"):
|
||||
assert ref in section, f"§13 missing reference to {ref!r}"
|
||||
|
||||
|
||||
def test_enrichment_coverage_matrix_lists_exactly_geocoder_fields():
|
||||
"""The §13 per-field coverage matrix must list exactly the canonical
|
||||
GEOCODER_FIELDS — derived from code, not hardcoded here."""
|
||||
section = _section(_doc_text(), r"^## 13\. Enrichment contract")
|
||||
# Matrix rows look like: | `field_name` | ... |
|
||||
row_fields = set(re.findall(r"^\|\s*`([a-z_]+)`\s*\|", section, re.MULTILINE))
|
||||
assert row_fields == set(GEOCODER_FIELDS), (
|
||||
f"coverage-matrix field drift: "
|
||||
f"doc-only={row_fields - set(GEOCODER_FIELDS)}, "
|
||||
f"code-only={set(GEOCODER_FIELDS) - row_fields}"
|
||||
)
|
||||
|
||||
|
||||
def test_no_orphan_adapter_references_in_anti_patterns():
|
||||
"""Anti-patterns section names two real adapter modules as examples
|
||||
(firms, inciweb in §10.4). Those names must still resolve via
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue