mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
feat(navi): structured geocode with usaddress parsing and reranker
Add lib/geocode.py — multi-source retrieval pipeline: - usaddress CRF parsing with intent classification - Netsyms structured lookup (uses raw street abbreviations) - Photon /structured + /api freetext retrieval - Weighted 10-signal reranker (housenumber, street fuzz, locality, source authority, etc.) - match_code annotations + address book proximity labeling - Trace log at /tmp/geocode_rerank_trace.log nav_tools.py now delegates geocode() to the new module. Tests updated: US address queries correctly return Netsyms results. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c76d63b785
commit
87b230dcba
3 changed files with 721 additions and 197 deletions
193
lib/nav_tools.py
193
lib/nav_tools.py
|
|
@ -50,86 +50,14 @@ def _haversine_m(lat1, lon1, lat2, lon2):
|
|||
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||
|
||||
|
||||
def _classify_photon_feature(props, index):
|
||||
"""Classify a Photon feature into (type, confidence)."""
|
||||
osm_key = props.get('osm_key', '')
|
||||
osm_value = props.get('osm_value', '')
|
||||
feat_type = props.get('type', '')
|
||||
has_housenumber = bool(props.get('housenumber'))
|
||||
|
||||
# Type classification
|
||||
if has_housenumber or osm_value in ('house', 'residential'):
|
||||
result_type = 'street_address'
|
||||
elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'):
|
||||
result_type = 'locality'
|
||||
elif osm_key in ('amenity', 'shop', 'tourism', 'leisure') or osm_value:
|
||||
result_type = 'poi'
|
||||
else:
|
||||
result_type = 'poi'
|
||||
|
||||
# Confidence — simple positional heuristic
|
||||
if index == 0:
|
||||
confidence = 'high'
|
||||
elif index <= 2:
|
||||
confidence = 'medium'
|
||||
else:
|
||||
confidence = 'low'
|
||||
|
||||
return result_type, confidence
|
||||
|
||||
|
||||
def _photon_feature_to_name(props):
|
||||
"""Build a display name from a Photon feature's properties."""
|
||||
parts = []
|
||||
housenumber = props.get('housenumber')
|
||||
street = props.get('street')
|
||||
name = props.get('name', '')
|
||||
|
||||
if housenumber and street:
|
||||
parts.append(f"{housenumber} {street}")
|
||||
if name and name != street:
|
||||
parts.append(name)
|
||||
elif name:
|
||||
parts.append(name)
|
||||
elif street:
|
||||
parts.append(street)
|
||||
|
||||
for key in ('city', 'county', 'state', 'country'):
|
||||
v = props.get(key)
|
||||
if v and (not parts or v != parts[-1]):
|
||||
parts.append(v)
|
||||
|
||||
return ', '.join(p for p in parts if p) or 'Unknown'
|
||||
|
||||
|
||||
def _annotate_with_address_book(results):
|
||||
"""Add labeled_as to results within ADDRESS_BOOK_ANNOTATION_RADIUS_M of an address book entry."""
|
||||
try:
|
||||
from . import address_book
|
||||
entries = address_book.load()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
for result in results:
|
||||
rlat, rlon = result.get('lat'), result.get('lon')
|
||||
if rlat is None or rlon is None:
|
||||
continue
|
||||
for entry in entries:
|
||||
elat, elon = entry.get('lat'), entry.get('lon')
|
||||
if elat is None or elon is None:
|
||||
continue
|
||||
dist = _haversine_m(rlat, rlon, elat, elon)
|
||||
if dist <= ADDRESS_BOOK_ANNOTATION_RADIUS_M:
|
||||
result['labeled_as'] = entry['name']
|
||||
break
|
||||
def geocode(query: str, limit: int = 10):
|
||||
"""Delegate to the structured geocode module. See lib/geocode.py."""
|
||||
from . import geocode as geocode_mod
|
||||
return geocode_mod.geocode(query, limit=limit)
|
||||
|
||||
|
||||
def _geocode(query: str):
|
||||
"""Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises.
|
||||
|
||||
Used internally by route() — returns a simple (lat, lon, name) tuple.
|
||||
For the full ranked-results API, use geocode() instead.
|
||||
"""
|
||||
"""Internal: returns (lat, lon, display_name) tuple for route()."""
|
||||
result = geocode(query, limit=1)
|
||||
results = result.get('results', [])
|
||||
if not results:
|
||||
|
|
@ -138,117 +66,6 @@ def _geocode(query: str):
|
|||
return top['lat'], top['lon'], top['name']
|
||||
|
||||
|
||||
|
||||
def geocode(query: str, limit: int = 10):
|
||||
"""
|
||||
Photon-first geocoding with ranked results.
|
||||
|
||||
Chain:
|
||||
1. Coordinate detection (pre-search)
|
||||
2. Address book nickname short-circuit (single-word queries only)
|
||||
3. Photon search (primary, biased to Idaho region)
|
||||
4. Address book proximity annotation (post-Photon, 75m radius)
|
||||
|
||||
Returns dict: {query, results: [...], count: N}
|
||||
Always 200-safe — empty results list is valid, never raises.
|
||||
|
||||
Netsyms is preserved at /api/netsyms/lookup for direct structured
|
||||
access. Enrichment of Photon street-address hits with USPS plus4
|
||||
from Netsyms is a planned follow-up (not wired here).
|
||||
"""
|
||||
limit = max(1, min(limit, 20))
|
||||
q = (query or '').strip()
|
||||
empty = {'query': q, 'results': [], 'count': 0}
|
||||
|
||||
if not q:
|
||||
return empty
|
||||
|
||||
# ── 1. Coordinate detection ──
|
||||
coords = _parse_coords(q)
|
||||
if coords:
|
||||
return {
|
||||
'query': q,
|
||||
'results': [{
|
||||
'name': q,
|
||||
'lat': coords[0],
|
||||
'lon': coords[1],
|
||||
'source': 'coordinates',
|
||||
'confidence': 'exact',
|
||||
'type': 'coordinates',
|
||||
'raw': None,
|
||||
}],
|
||||
'count': 1,
|
||||
}
|
||||
|
||||
# ── 2. Address book nickname short-circuit ──
|
||||
# Only short-circuit on single-word queries ("home", "work").
|
||||
# Multi-word queries fall through to Photon for proper ranking.
|
||||
normalized_q = ' '.join(q.lower().replace(',', ' ').split())
|
||||
is_single_word = ' ' not in normalized_q
|
||||
try:
|
||||
from . import address_book
|
||||
ab_match = address_book.lookup(q)
|
||||
if (ab_match
|
||||
and ab_match['confidence'] == 'exact'
|
||||
and ab_match.get('lat') and ab_match.get('lon')
|
||||
and is_single_word):
|
||||
logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name'])
|
||||
return {
|
||||
'query': q,
|
||||
'results': [{
|
||||
'name': ab_match.get('address') or ab_match['name'],
|
||||
'lat': ab_match['lat'],
|
||||
'lon': ab_match['lon'],
|
||||
'source': 'address_book',
|
||||
'confidence': 'exact',
|
||||
'type': 'nickname',
|
||||
'raw': ab_match,
|
||||
}],
|
||||
'count': 1,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("geocode: address_book lookup failed: %s", e)
|
||||
|
||||
# ── 3. Photon search (primary) ──
|
||||
results = []
|
||||
try:
|
||||
params = {
|
||||
'q': q,
|
||||
'limit': limit,
|
||||
'lat': GEOCODE_BIAS_LAT,
|
||||
'lon': GEOCODE_BIAS_LON,
|
||||
'zoom': GEOCODE_BIAS_ZOOM,
|
||||
}
|
||||
resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
for i, feature in enumerate(data.get('features', [])):
|
||||
props = feature.get('properties', {})
|
||||
geom_coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
||||
result_type, confidence = _classify_photon_feature(props, i)
|
||||
name = _photon_feature_to_name(props)
|
||||
results.append({
|
||||
'name': name,
|
||||
'lat': geom_coords[1],
|
||||
'lon': geom_coords[0],
|
||||
'source': 'photon',
|
||||
'confidence': confidence,
|
||||
'type': result_type,
|
||||
'raw': props,
|
||||
})
|
||||
except requests.RequestException as e:
|
||||
logger.warning("geocode: Photon request failed: %s", e)
|
||||
except Exception as e:
|
||||
logger.warning("geocode: Photon parse error: %s", e)
|
||||
|
||||
# ── 4. Address book annotation (post-Photon) ──
|
||||
_annotate_with_address_book(results)
|
||||
|
||||
logger.info("geocode: %r → %d results", q, len(results))
|
||||
return {'query': q, 'results': results, 'count': len(results)}
|
||||
|
||||
|
||||
def reverse_geocode(lat: float, lon: float) -> str:
|
||||
"""Reverse geocode coordinates via Photon. Returns formatted address string."""
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue