refactor(navi): Photon-first geocoding with ranked results

Inverts the /api/geocode chain. Photon is now the primary search
engine; the hand-rolled Netsyms free-text parser is removed.
Address book short-circuits nicknames only ("home", "work") —
full-address queries flow through Photon and address book
entries within 75m annotate matching results with labeled_as.
Coordinate strings detected before search.

Response shape: /api/geocode now returns a ranked candidates
list (always 200 OK, empty list if no match). No more 404 for
unmatched queries. Users can type messy input — wrong case,
missing punctuation, abbreviations, typos — and get results
or close matches.

Netsyms preserved at /api/netsyms/lookup for direct access.
USPS plus4 enrichment of Photon street-address hits is a
planned follow-up.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-20 15:48:03 +00:00
commit c76d63b785
3 changed files with 377 additions and 140 deletions

158
lib/geocode_test.py Normal file
View file

@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""Tests for RECON Photon-first geocode chain."""
import sys
import os
import json
import urllib.request
import urllib.parse
BASE = "http://localhost:8420"
TESTS = [
{
"name": "home → nickname short-circuit",
"query": "home",
"check": lambda r: (
r["count"] == 1
and r["results"][0]["source"] == "address_book"
and r["results"][0]["confidence"] == "exact"
and r["results"][0]["type"] == "nickname"
),
},
{
"name": "214 north st filer → photon results (multi-word, not nickname)",
"query": "214 north st filer",
"check": lambda r: (
r["count"] >= 1
and r["results"][0]["source"] == "photon"
# labeled_as=Home may or may not appear depending on Photon's
# geocoding precision — the key invariant is that this multi-word
# query flows through Photon, not the address book shortcut.
),
},
{
"name": "214 North St, Filer, ID → photon (case/punctuation)",
"query": "214 North St, Filer, ID",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "214 NORTH ST FILER ID → photon (uppercase)",
"query": "214 NORTH ST FILER ID",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "1600 Pennsylvania Ave Washington DC → White House",
"query": "1600 Pennsylvania Ave Washington DC",
"check": lambda r: (
r["count"] >= 1
and r["results"][0]["source"] == "photon"
),
},
{
"name": "1600 pennsylvania ave washington dc → lowercase",
"query": "1600 pennsylvania ave washington dc",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "starbucks filer → POI result",
"query": "starbucks filer",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "filer idaho → locality",
"query": "filer idaho",
"check": lambda r: (
r["count"] >= 1
and r["results"][0]["source"] == "photon"
and r["results"][0]["type"] == "locality"
),
},
{
"name": "filer → partial query, at least 1 result",
"query": "filer",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "42.5736, -114.6066 → coordinates (with space)",
"query": "42.5736, -114.6066",
"check": lambda r: (
r["count"] == 1
and r["results"][0]["source"] == "coordinates"
and r["results"][0]["confidence"] == "exact"
and r["results"][0]["type"] == "coordinates"
),
},
{
"name": "42.5736,-114.6066 → coordinates (no space)",
"query": "42.5736,-114.6066",
"check": lambda r: (
r["count"] == 1
and r["results"][0]["source"] == "coordinates"
and r["results"][0]["confidence"] == "exact"
),
},
{
"name": "boise → at least 1 result",
"query": "boise",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "toronto → CA canary",
"query": "toronto",
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
},
{
"name": "asdfghjklqwerty → empty results, 200 OK",
"query": "asdfghjklqwerty",
"check": lambda r: r["count"] == 0 and r["results"] == [],
},
{
"name": "empty query → empty results",
"query": "",
"check": lambda r: r["count"] == 0 and r["results"] == [],
},
]
passed = 0
failed = 0
for t in TESTS:
q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q="
url = f"{BASE}/api/geocode?{q}"
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=10) as resp:
status = resp.status
body = json.loads(resp.read())
except urllib.error.HTTPError as e:
status = e.code
try:
body = json.loads(e.read())
except Exception:
body = {}
except Exception as e:
status = 0
body = {}
print(f" [FAIL] {t['name']}")
print(f" EXCEPTION: {e}")
failed += 1
continue
ok = status == 200 and t["check"](body)
tag = "PASS" if ok else "FAIL"
if ok:
passed += 1
else:
failed += 1
top = body.get("results", [{}])[0] if body.get("results") else {}
top_summary = f"source={top.get('source','')} type={top.get('type','')} conf={top.get('confidence','')} name={top.get('name','')[:50]}"
print(f" [{tag}] {t['name']}")
if not ok:
print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}")
else:
labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else ""
print(f"{top_summary}{labeled}")
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)

View file

@ -1,5 +1,6 @@
"""Navigation tools: geocoding via Photon and routing via Valhalla."""
import math
import re
import requests
@ -10,178 +11,242 @@ logger = setup_logging('recon.nav_tools')
PHOTON_URL = "http://localhost:2322"
VALHALLA_URL = "http://localhost:8002"
_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$')
# Regional bias for Photon searches (Idaho-centric for Matt's use case).
# Adjustable — Photon uses these to rank nearby results higher.
GEOCODE_BIAS_LAT = 42.5736
GEOCODE_BIAS_LON = -114.6066
GEOCODE_BIAS_ZOOM = 10
# Distance threshold (meters) for annotating Photon results with address
# book labels. 75m covers GPS jitter + geocoder imprecision.
ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75
# Coordinate regex — handles comma-separated and space-separated forms.
_COORD_RE = re.compile(
r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$'
)
VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"}
def _parse_coords(text: str):
"""Return (lat, lon) if text looks like coordinates, else None."""
"""Return (lat, lon) if text looks like coordinates with valid bounds, else None."""
m = _COORD_RE.match(text.strip())
if m:
return float(m.group(1)), float(m.group(2))
if not m:
return None
lat, lon = float(m.group(1)), float(m.group(2))
if -90 <= lat <= 90 and -180 <= lon <= 180:
return lat, lon
return None
def _haversine_m(lat1, lon1, lat2, lon2):
"""Haversine distance in meters between two (lat, lon) points."""
R = 6_371_000 # Earth radius in meters
rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
def _classify_photon_feature(props, index):
"""Classify a Photon feature into (type, confidence)."""
osm_key = props.get('osm_key', '')
osm_value = props.get('osm_value', '')
feat_type = props.get('type', '')
has_housenumber = bool(props.get('housenumber'))
# Type classification
if has_housenumber or osm_value in ('house', 'residential'):
result_type = 'street_address'
elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'):
result_type = 'locality'
elif osm_key in ('amenity', 'shop', 'tourism', 'leisure') or osm_value:
result_type = 'poi'
else:
result_type = 'poi'
# Confidence — simple positional heuristic
if index == 0:
confidence = 'high'
elif index <= 2:
confidence = 'medium'
else:
confidence = 'low'
return result_type, confidence
def _photon_feature_to_name(props):
"""Build a display name from a Photon feature's properties."""
parts = []
housenumber = props.get('housenumber')
street = props.get('street')
name = props.get('name', '')
if housenumber and street:
parts.append(f"{housenumber} {street}")
if name and name != street:
parts.append(name)
elif name:
parts.append(name)
elif street:
parts.append(street)
for key in ('city', 'county', 'state', 'country'):
v = props.get(key)
if v and (not parts or v != parts[-1]):
parts.append(v)
return ', '.join(p for p in parts if p) or 'Unknown'
def _annotate_with_address_book(results):
"""Add labeled_as to results within ADDRESS_BOOK_ANNOTATION_RADIUS_M of an address book entry."""
try:
from . import address_book
entries = address_book.load()
except Exception:
return
for result in results:
rlat, rlon = result.get('lat'), result.get('lon')
if rlat is None or rlon is None:
continue
for entry in entries:
elat, elon = entry.get('lat'), entry.get('lon')
if elat is None or elon is None:
continue
dist = _haversine_m(rlat, rlon, elat, elon)
if dist <= ADDRESS_BOOK_ANNOTATION_RADIUS_M:
result['labeled_as'] = entry['name']
break
def _geocode(query: str):
"""Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises."""
coords = _parse_coords(query)
if coords:
return coords[0], coords[1], query
"""Geocode a place name via address book then Photon. Returns (lat, lon, display_name) or raises.
# ── Address book lookup (before Photon) ──
try:
from . import address_book
match = address_book.lookup(query)
if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'):
logger.info("Address book exact match: %r%s (%s, %s)",
query, match['name'], match['lat'], match['lon'])
return match['lat'], match['lon'], match.get('address') or match['name']
elif match and match['confidence'] == 'partial':
logger.info("Address book partial match: %r%s (falling through to Photon)",
query, match['name'])
except Exception as e:
logger.debug("Address book lookup failed: %s", e)
# ── Photon geocoding ──
try:
resp = requests.get(
f"{PHOTON_URL}/api",
params={"q": query, "limit": 1},
timeout=10,
)
resp.raise_for_status()
except requests.RequestException:
raise RuntimeError("Navigation service unavailable")
data = resp.json()
features = data.get("features", [])
if not features:
Used internally by route() returns a simple (lat, lon, name) tuple.
For the full ranked-results API, use geocode() instead.
"""
result = geocode(query, limit=1)
results = result.get('results', [])
if not results:
raise ValueError(f"Could not find location: {query}")
props = features[0]["properties"]
coords = features[0]["geometry"]["coordinates"] # [lon, lat]
parts = [props.get("name", "")]
for key in ("city", "county", "state", "country"):
v = props.get(key)
if v and v != parts[-1]:
parts.append(v)
display = ", ".join(p for p in parts if p)
return coords[1], coords[0], display # lat, lon
top = results[0]
return top['lat'], top['lon'], top['name']
def geocode(query: str):
def geocode(query: str, limit: int = 10):
"""
Three-tier geocode chain returning a consistent shape.
Photon-first geocoding with ranked results.
Chain: address_book (exact) netsyms photon.
Returns dict with {name, lat, lon, source, raw} or None.
Chain:
1. Coordinate detection (pre-search)
2. Address book nickname short-circuit (single-word queries only)
3. Photon search (primary, biased to Idaho region)
4. Address book proximity annotation (post-Photon, 75m radius)
Returns dict: {query, results: [...], count: N}
Always 200-safe empty results list is valid, never raises.
Netsyms is preserved at /api/netsyms/lookup for direct structured
access. Enrichment of Photon street-address hits with USPS plus4
from Netsyms is a planned follow-up (not wired here).
"""
coords = _parse_coords(query)
limit = max(1, min(limit, 20))
q = (query or '').strip()
empty = {'query': q, 'results': [], 'count': 0}
if not q:
return empty
# ── 1. Coordinate detection ──
coords = _parse_coords(q)
if coords:
return {
'name': query,
'query': q,
'results': [{
'name': q,
'lat': coords[0],
'lon': coords[1],
'source': 'coordinates',
'confidence': 'exact',
'type': 'coordinates',
'raw': None,
}],
'count': 1,
}
# ── Tier 1: Address book (exact match only) ──
ab_partial = None
# ── 2. Address book nickname short-circuit ──
# Only short-circuit on single-word queries ("home", "work").
# Multi-word queries fall through to Photon for proper ranking.
normalized_q = ' '.join(q.lower().replace(',', ' ').split())
is_single_word = ' ' not in normalized_q
try:
from . import address_book
match = address_book.lookup(query)
if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'):
logger.info("geocode: address_book exact match: %r%s", query, match['name'])
ab_match = address_book.lookup(q)
if (ab_match
and ab_match['confidence'] == 'exact'
and ab_match.get('lat') and ab_match.get('lon')
and is_single_word):
logger.info("geocode: nickname short-circuit %r%s", q, ab_match['name'])
return {
'name': match.get('address') or match['name'],
'lat': match['lat'],
'lon': match['lon'],
'query': q,
'results': [{
'name': ab_match.get('address') or ab_match['name'],
'lat': ab_match['lat'],
'lon': ab_match['lon'],
'source': 'address_book',
'raw': match,
'confidence': 'exact',
'type': 'nickname',
'raw': ab_match,
}],
'count': 1,
}
elif match and match['confidence'] == 'partial':
logger.info("geocode: address_book partial match: %r%s (continuing chain)",
query, match['name'])
ab_partial = match
except Exception as e:
logger.debug("geocode: address_book lookup failed: %s", e)
# ── Tier 2: Netsyms (159M US+CA addresses) ──
netsyms_result = None
# ── 3. Photon search (primary) ──
results = []
try:
from . import netsyms
results = netsyms.lookup_free_text(query)
if results:
# Prefer results with plus4 (more precise)
best = results[0]
for r in results:
if r.get('plus4') and not best.get('plus4'):
best = r
break
addr_parts = [best['number'], best['street']]
if best.get('street2'):
addr_parts.append(best['street2'])
addr_parts.extend([best['city'], best['state'], best['zipcode']])
display = ' '.join(p for p in addr_parts if p)
netsyms_result = {
'name': display,
'lat': best['lat'],
'lon': best['lon'],
'source': 'netsyms',
'raw': best,
params = {
'q': q,
'limit': limit,
'lat': GEOCODE_BIAS_LAT,
'lon': GEOCODE_BIAS_LON,
'zoom': GEOCODE_BIAS_ZOOM,
}
logger.info("geocode: netsyms match: %r%s", query, display)
return netsyms_result
except Exception as e:
logger.debug("geocode: netsyms lookup failed: %s", e)
# ── Tier 3: Photon (global geocoding) ──
try:
resp = requests.get(
f"{PHOTON_URL}/api",
params={"q": query, "limit": 1},
timeout=2,
)
resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5)
resp.raise_for_status()
data = resp.json()
features = data.get("features", [])
if features:
props = features[0]["properties"]
coords = features[0]["geometry"]["coordinates"] # [lon, lat]
parts = [props.get("name", "")]
for key in ("city", "county", "state", "country"):
v = props.get(key)
if v and v != parts[-1]:
parts.append(v)
display = ", ".join(p for p in parts if p)
logger.info("geocode: photon match: %r%s", query, display)
return {
'name': display,
'lat': coords[1],
'lon': coords[0],
for i, feature in enumerate(data.get('features', [])):
props = feature.get('properties', {})
geom_coords = feature.get('geometry', {}).get('coordinates', [0, 0])
result_type, confidence = _classify_photon_feature(props, i)
name = _photon_feature_to_name(props)
results.append({
'name': name,
'lat': geom_coords[1],
'lon': geom_coords[0],
'source': 'photon',
'confidence': confidence,
'type': result_type,
'raw': props,
}
})
except requests.RequestException as e:
logger.warning("geocode: Photon request failed: %s", e)
except Exception as e:
logger.debug("geocode: photon lookup failed: %s", e)
logger.warning("geocode: Photon parse error: %s", e)
# ── Fallback: address book partial match ──
if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'):
logger.info("geocode: falling back to address_book partial: %r%s",
query, ab_partial['name'])
return {
'name': ab_partial.get('address') or ab_partial['name'],
'lat': ab_partial['lat'],
'lon': ab_partial['lon'],
'source': 'address_book',
'raw': ab_partial,
}
# ── 4. Address book annotation (post-Photon) ──
_annotate_with_address_book(results)
logger.info("geocode: no match for %r across all tiers", query)
return None
logger.info("geocode: %r%d results", q, len(results))
return {'query': q, 'results': results, 'count': len(results)}
def reverse_geocode(lat: float, lon: float) -> str:

View file

@ -1,9 +1,9 @@
"""
RECON Netsyms API + Geocode chain Flask Blueprints.
RECON Netsyms API + Geocode Flask Blueprints.
GET /api/netsyms/lookup?q=<free text>&country=<optional>
GET /api/netsyms/health
GET /api/geocode?q=<query> (full 3-tier chain: address_book netsyms photon)
GET /api/geocode?q=<query>&limit=<N> (Photon-first search with ranked results)
"""
from flask import Blueprint, request, jsonify
@ -37,12 +37,26 @@ def api_netsyms_health():
@geocode_bp.route('/api/geocode')
def api_geocode():
"""
Photon-first geocoding with ranked candidates.
GET /api/geocode?q=<query>&limit=<N>
Always returns 200 OK with:
{query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count}
- source: "address_book" | "coordinates" | "photon"
- confidence: "exact" | "high" | "medium" | "low"
- type: "nickname" | "coordinates" | "street_address" | "poi" | "locality"
- labeled_as: present when result is within 75m of an address book entry
- Empty results array is valid (no match). No 404s.
"""
q = request.args.get('q', '').strip()
if not q:
return jsonify({'error': 'Missing q parameter'}), 400
result = nav_tools.geocode(q)
if result is None:
return jsonify({'error': 'No results', 'query': q}), 404
limit = request.args.get('limit', '10')
try:
limit = max(1, min(int(limit), 20))
except (ValueError, TypeError):
limit = 10
result = nav_tools.geocode(q, limit=limit)
return jsonify(result)