mirror of
https://github.com/zvx-echo6/central.git
synced 2026-06-10 11:54:37 +02:00
feat(3-K): real geocoder backends + producer-doc reframe + consumer-doc enrichment
Second of three PRs for v0.5.0 (J shipped the framework; this fills in real backends + documents the reframed design principle in-tree; L is the events tab + map fix, then tag). Backends (all satisfy GeocoderBackend; never raise, all-null on any failure): - NaviBackend — composed Navi /api/reverse/<lat>/<lon> (name/address + timezone + landclass + elevation in one call). Near-passthrough: response already matches the canonical 9-field shape. Best-effort warmup ping (Boise) on construction when a loop is running; config `headers` slot for a future Authorization: Bearer (config-only, no code change). Default base_url http://192.168.1.130:8440. - PhotonBackend — raw Photon /reverse?lat&lon&limit=1 (name/address only). Maps features[0].properties; postal_code <- postcode; timezone/landclass/ elevation_m null (Navi-composed-endpoint extras). - NominatimBackend — OSM Nominatim /reverse?format=jsonv2 (name/address only). Configurable rate limit (default 1/sec; 0 disables for self-hosted) + required User-Agent. Maps the address block; landclass/elevation_m/timezone null. Registered all three in supervisor _BACKEND_REGISTRY (resolved by EnrichmentConfig backend_class name). Docs — design pivot now in-tree: - PRODUCER §2 reframed: the verbatim Matt quote stays; the translation inverts. Central is the consumer's only data plane (consumers can't do follow-up lookups), so enrich deliberately and centrally, namespaced under _enriched, failing to null. "No enrichment" is gone. - PRODUCER §10.1 inverted: enrichment is expected; the anti-pattern is doing it OUTSIDE the framework (inline in poll(), bypassing cache + _enriched namespacing + the never-raise safety net). - PRODUCER new §13 Enrichment contract: Enricher / GeocoderEnricher / GeocoderBackend Protocols, NoOpBackend default, sqlite cache + TTL + cache-all-null + don't-cache-on-raise semantics, _enriched.<name> provenance, per-field coverage matrix (cross-checked against GEOCODER_FIELDS), and the landclass antimeridian known wrinkle. - CONSUMER FIRMS section: documents the data._enriched.geocoder bundle (9 fields), per-region coverage (US-full, non-US timezone+elevation), and the antimeridian landclass caveat. Tests: - test_navi/photon/nominatim_backend.py — happy-path field mapping, null handling, extra-key drop, network/timeout/non-200/malformed -> all-null (never raises), Nominatim rate-limit (disabled + spacing) + User-Agent. Env-gated live Navi smoke (NAVI_INTEGRATION_TEST=1; skipped by default — the 192.168.1.130 endpoint isn't reachable from CT104's segment). - test_producer_doc.py — +4: §2 verbatim quote present, §10.1 subsection exists, §13 names all four protocol types, §13 coverage matrix == GEOCODER_FIELDS (derived from code, not hardcoded). Verification: full pytest 525 passed, 1 skipped (was 495; +30 backend + 4 doc tests, -1 the env-gated skip). grep subject_for_event/_ADAPTER_REGISTRY clean. All three backends import + resolve via the registry. Flagged for later (NOT done here): adapters besides FIRMS that should declare enrichment_locations (nwis, eonet, gdacs, usgs_quake, wfigs_*) — that's PR L scope alongside the events tab. See PR description. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a477285b3f
commit
98b050b2af
11 changed files with 833 additions and 37 deletions
|
|
@ -1,5 +1,8 @@
|
|||
"""Geocoder backend implementations."""
|
||||
|
||||
from central.enrichment.backends.navi import NaviBackend
|
||||
from central.enrichment.backends.nominatim import NominatimBackend
|
||||
from central.enrichment.backends.no_op import NoOpBackend
|
||||
from central.enrichment.backends.photon import PhotonBackend
|
||||
|
||||
__all__ = ["NoOpBackend"]
|
||||
__all__ = ["NoOpBackend", "NaviBackend", "PhotonBackend", "NominatimBackend"]
|
||||
|
|
|
|||
79
src/central/enrichment/backends/navi.py
Normal file
79
src/central/enrichment/backends/navi.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Navi reverse-geocoding backend.
|
||||
|
||||
Hits the composed Navi endpoint `<base_url>/api/reverse/<lat>/<lon>`, which
|
||||
already returns the canonical 9-field bundle (name, city, county, state,
|
||||
country, postal_code, timezone, landclass, elevation_m). Navi composes Photon
|
||||
(name/address) + tz_world (timezone) + PAD-US (landclass) + planet-DEM
|
||||
(elevation_m), so this backend is a near-passthrough mapping.
|
||||
|
||||
Coverage today: US events get a rich bundle; non-US events get timezone +
|
||||
elevation_m populated (both planet-scale) and the rest null until Navi's
|
||||
Photon planet expansion lands (no Central change needed when it does).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
from central.enrichment.geocoder import GEOCODER_FIELDS, all_null_bundle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_BASE_URL = "http://192.168.1.130:8440"
|
||||
# Boise — warmup coordinate, amortizes Photon/DEM cold-connection cost at startup.
|
||||
_WARMUP_LAT = 43.6150
|
||||
_WARMUP_LON = -116.2023
|
||||
|
||||
|
||||
class NaviBackend:
|
||||
"""GeocoderBackend backed by the composed Navi /api/reverse endpoint."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = DEFAULT_BASE_URL,
|
||||
timeout_s: float = 10.0,
|
||||
headers: dict[str, str] | None = None,
|
||||
warmup: bool = True,
|
||||
) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._timeout_s = timeout_s
|
||||
# Future-proof: drop an Authorization: Bearer … here config-only, no code change.
|
||||
self._headers = dict(headers or {})
|
||||
if warmup:
|
||||
# Fire-and-forget warmup ping; only if a loop is running (it is under
|
||||
# the supervisor's asyncio.run, not under sync test construction).
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.create_task(self._warmup())
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
def _url(self, lat: float, lon: float) -> str:
|
||||
return f"{self._base_url}/api/reverse/{lat}/{lon}"
|
||||
|
||||
async def _warmup(self) -> None:
|
||||
try:
|
||||
await self._fetch(_WARMUP_LAT, _WARMUP_LON)
|
||||
except Exception:
|
||||
# Warmup is best-effort; a failure here must not break startup.
|
||||
logger.debug("NaviBackend warmup ping failed (non-fatal)")
|
||||
|
||||
async def _fetch(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=self._timeout_s),
|
||||
) as session:
|
||||
async with session.get(self._url(lat, lon), headers=self._headers) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
async def reverse(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
try:
|
||||
data = await self._fetch(lat, lon)
|
||||
except Exception:
|
||||
# Non-200, network error, timeout, malformed JSON — never raise.
|
||||
logger.debug("NaviBackend reverse failed; returning all-null bundle")
|
||||
return all_null_bundle()
|
||||
# Navi's response already matches the canonical shape; map defensively.
|
||||
return {field: data.get(field) for field in GEOCODER_FIELDS}
|
||||
95
src/central/enrichment/backends/nominatim.py
Normal file
95
src/central/enrichment/backends/nominatim.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
"""OSM Nominatim reverse-geocoding backend.
|
||||
|
||||
Works against public OSM Nominatim (1 req/sec + User-Agent required) or a
|
||||
self-hosted instance (no limit). Resolves name + address only; timezone,
|
||||
landclass, and elevation_m are nulled (not in the Nominatim reverse response).
|
||||
|
||||
Nominatim jsonv2 reverse response shape:
|
||||
{"display_name": "...", "name": "...",
|
||||
"address": {city|town|village, county, state, country, postcode, ...}}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import aiohttp
|
||||
|
||||
from central.enrichment.geocoder import all_null_bundle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
|
||||
DEFAULT_USER_AGENT = "central-enrichment/0.5 (https://github.com/zvx-echo6/central)"
|
||||
|
||||
|
||||
class NominatimBackend:
|
||||
"""GeocoderBackend backed by an OSM Nominatim /reverse endpoint.
|
||||
|
||||
rate_limit_per_sec throttles outbound requests (public OSM requires <= 1/s);
|
||||
set it to 0 to disable for self-hosted instances.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = DEFAULT_BASE_URL,
|
||||
user_agent: str = DEFAULT_USER_AGENT,
|
||||
rate_limit_per_sec: float = 1.0,
|
||||
timeout_s: float = 10.0,
|
||||
) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._user_agent = user_agent
|
||||
self._min_interval = (1.0 / rate_limit_per_sec) if rate_limit_per_sec > 0 else 0.0
|
||||
self._timeout_s = timeout_s
|
||||
self._rl_lock = asyncio.Lock()
|
||||
self._last_request_at = 0.0
|
||||
|
||||
def _url(self, lat: float, lon: float) -> str:
|
||||
qs = urlencode({"lat": lat, "lon": lon, "format": "jsonv2"})
|
||||
return f"{self._base_url}/reverse?{qs}"
|
||||
|
||||
def _request_headers(self) -> dict[str, str]:
|
||||
# Public Nominatim rejects requests without an identifying User-Agent.
|
||||
return {"User-Agent": self._user_agent}
|
||||
|
||||
async def _throttle(self) -> None:
|
||||
if self._min_interval <= 0:
|
||||
return
|
||||
async with self._rl_lock:
|
||||
now = time.monotonic()
|
||||
wait = self._last_request_at + self._min_interval - now
|
||||
if wait > 0:
|
||||
await asyncio.sleep(wait)
|
||||
self._last_request_at = time.monotonic()
|
||||
|
||||
async def _fetch(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=self._timeout_s),
|
||||
) as session:
|
||||
async with session.get(
|
||||
self._url(lat, lon), headers=self._request_headers()
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
async def reverse(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
try:
|
||||
await self._throttle()
|
||||
data = await self._fetch(lat, lon)
|
||||
addr = data.get("address", {}) or {}
|
||||
except Exception:
|
||||
logger.debug("NominatimBackend reverse failed; returning all-null bundle")
|
||||
return all_null_bundle()
|
||||
return {
|
||||
"name": data.get("name") or data.get("display_name"),
|
||||
"city": addr.get("city") or addr.get("town") or addr.get("village"),
|
||||
"county": addr.get("county"),
|
||||
"state": addr.get("state"),
|
||||
"country": addr.get("country"),
|
||||
"postal_code": addr.get("postcode"),
|
||||
"timezone": None, # not in Nominatim reverse response
|
||||
"landclass": None, # Navi-composed-endpoint only
|
||||
"elevation_m": None, # Navi-composed-endpoint only
|
||||
}
|
||||
69
src/central/enrichment/backends/photon.py
Normal file
69
src/central/enrichment/backends/photon.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
"""Raw Photon reverse-geocoding backend.
|
||||
|
||||
For deployers who run a Photon instance directly, without the composed
|
||||
Navi-style endpoint. Photon resolves name + address only — timezone,
|
||||
landclass, and elevation_m are Navi-composed-endpoint extras and are nulled
|
||||
here.
|
||||
|
||||
Photon reverse response shape:
|
||||
{"features": [{"properties": {name, city, county, state, country,
|
||||
postcode, ...}, "geometry": {...}}]}
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import aiohttp
|
||||
|
||||
from central.enrichment.geocoder import all_null_bundle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_BASE_URL = "http://localhost:2322"
|
||||
|
||||
|
||||
class PhotonBackend:
|
||||
"""GeocoderBackend backed by a raw Photon /reverse endpoint."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = DEFAULT_BASE_URL,
|
||||
timeout_s: float = 10.0,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._timeout_s = timeout_s
|
||||
self._headers = dict(headers or {})
|
||||
|
||||
def _url(self, lat: float, lon: float) -> str:
|
||||
qs = urlencode({"lat": lat, "lon": lon, "limit": 1})
|
||||
return f"{self._base_url}/reverse?{qs}"
|
||||
|
||||
async def _fetch(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
async with aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=self._timeout_s),
|
||||
) as session:
|
||||
async with session.get(self._url(lat, lon), headers=self._headers) as resp:
|
||||
resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
async def reverse(self, lat: float, lon: float) -> dict[str, Any]:
|
||||
try:
|
||||
data = await self._fetch(lat, lon)
|
||||
features = data.get("features") or []
|
||||
props = features[0].get("properties", {}) if features else {}
|
||||
except Exception:
|
||||
logger.debug("PhotonBackend reverse failed; returning all-null bundle")
|
||||
return all_null_bundle()
|
||||
return {
|
||||
"name": props.get("name"),
|
||||
"city": props.get("city"),
|
||||
"county": props.get("county"),
|
||||
"state": props.get("state"),
|
||||
"country": props.get("country"),
|
||||
"postal_code": props.get("postcode"), # Photon names it 'postcode'
|
||||
"timezone": None, # not provided by raw Photon
|
||||
"landclass": None, # Navi-composed-endpoint only
|
||||
"elevation_m": None, # Navi-composed-endpoint only
|
||||
}
|
||||
|
|
@ -24,7 +24,10 @@ from central.api_key_resolver import resolve_api_key_alias
|
|||
from central.config_models import EnrichmentConfig
|
||||
from central.enrichment.base import Enricher
|
||||
from central.enrichment.cache import EnrichmentCache
|
||||
from central.enrichment.backends.navi import NaviBackend
|
||||
from central.enrichment.backends.nominatim import NominatimBackend
|
||||
from central.enrichment.backends.no_op import NoOpBackend
|
||||
from central.enrichment.backends.photon import PhotonBackend
|
||||
from central.enrichment.geocoder import GeocoderEnricher
|
||||
from central.models import Event
|
||||
from central.stream_manager import StreamManager
|
||||
|
|
@ -33,9 +36,13 @@ CURSOR_DB_PATH = Path("/var/lib/central/cursors.db")
|
|||
ENRICHMENT_CACHE_DB_PATH = Path("/var/lib/central/enrichment_cache.db")
|
||||
|
||||
# Enricher / backend class-name registries for EnrichmentConfig resolution.
|
||||
# PR J ships GeocoderEnricher + NoOpBackend only; PR K extends these.
|
||||
_ENRICHER_REGISTRY: dict[str, type] = {"GeocoderEnricher": GeocoderEnricher}
|
||||
_BACKEND_REGISTRY: dict[str, type] = {"NoOpBackend": NoOpBackend}
|
||||
_BACKEND_REGISTRY: dict[str, type] = {
|
||||
"NoOpBackend": NoOpBackend,
|
||||
"NaviBackend": NaviBackend,
|
||||
"PhotonBackend": PhotonBackend,
|
||||
"NominatimBackend": NominatimBackend,
|
||||
}
|
||||
|
||||
|
||||
def build_enrichers(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue