mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Merge feature/navi-integration: Navi backend (address book, Netsyms, geocoding chain, reverse endpoint)
This commit is contained in:
commit
d4c5c371ca
15 changed files with 2163 additions and 0 deletions
18
config/address_book.yaml
Normal file
18
config/address_book.yaml
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# RECON Address Book — saved locations for navigation shortcuts.
|
||||||
|
# Entries are matched by name and aliases (case-insensitive).
|
||||||
|
# Add new entries by appending to the list below.
|
||||||
|
|
||||||
|
entries:
|
||||||
|
- id: home
|
||||||
|
name: Home
|
||||||
|
aliases:
|
||||||
|
- home
|
||||||
|
- matt's house
|
||||||
|
- 214 north st
|
||||||
|
- 214 north street
|
||||||
|
address: "214 North St, Filer, ID 83328"
|
||||||
|
lat: 42.5735833
|
||||||
|
lon: -114.6066389
|
||||||
|
tags:
|
||||||
|
- residence
|
||||||
|
- primary
|
||||||
160
lib/address_book.py
Normal file
160
lib/address_book.py
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
"""
|
||||||
|
RECON Address Book — YAML-backed saved-location lookup.
|
||||||
|
|
||||||
|
Provides named locations (home, work, etc.) that short-circuit Photon
|
||||||
|
geocoding when an exact alias match is found.
|
||||||
|
|
||||||
|
Config: /opt/recon/config/address_book.yaml
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from .utils import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging('recon.address_book')
|
||||||
|
|
||||||
|
_CONFIG_PATH = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||||
|
'config', 'address_book.yaml',
|
||||||
|
)
|
||||||
|
|
||||||
|
_lock = threading.Lock()
|
||||||
|
_entries: list[dict] = []
|
||||||
|
_mtime: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _reload_if_changed():
|
||||||
|
"""Reload the YAML file if its mtime has changed."""
|
||||||
|
global _entries, _mtime
|
||||||
|
try:
|
||||||
|
st = os.stat(_CONFIG_PATH)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning("Address book not found: %s", _CONFIG_PATH)
|
||||||
|
_entries = []
|
||||||
|
_mtime = 0.0
|
||||||
|
return
|
||||||
|
|
||||||
|
if st.st_mtime == _mtime:
|
||||||
|
return
|
||||||
|
|
||||||
|
with _lock:
|
||||||
|
# Double-check after acquiring lock
|
||||||
|
try:
|
||||||
|
st = os.stat(_CONFIG_PATH)
|
||||||
|
except FileNotFoundError:
|
||||||
|
_entries = []
|
||||||
|
_mtime = 0.0
|
||||||
|
return
|
||||||
|
if st.st_mtime == _mtime:
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(_CONFIG_PATH, 'r') as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
|
raw = data.get('entries', [])
|
||||||
|
loaded = []
|
||||||
|
for entry in raw:
|
||||||
|
# Normalise aliases to lowercase for matching
|
||||||
|
aliases = [a.lower() for a in entry.get('aliases', [])]
|
||||||
|
loaded.append({
|
||||||
|
'id': entry.get('id', ''),
|
||||||
|
'name': entry.get('name', ''),
|
||||||
|
'aliases': aliases,
|
||||||
|
'address': entry.get('address', ''),
|
||||||
|
'lat': entry.get('lat'),
|
||||||
|
'lon': entry.get('lon'),
|
||||||
|
'tags': entry.get('tags', []),
|
||||||
|
})
|
||||||
|
_entries = loaded
|
||||||
|
_mtime = st.st_mtime
|
||||||
|
logger.info("Address book loaded: %d entries from %s", len(_entries), _CONFIG_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def load():
|
||||||
|
"""Ensure the address book is loaded (and refreshed if the file changed)."""
|
||||||
|
_reload_if_changed()
|
||||||
|
return _entries
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize(text: str) -> str:
|
||||||
|
"""Lowercase, strip, remove commas, collapse whitespace."""
|
||||||
|
t = text.strip().lower()
|
||||||
|
t = t.replace(',', ' ')
|
||||||
|
return ' '.join(t.split())
|
||||||
|
|
||||||
|
|
||||||
|
def lookup(query: str):
|
||||||
|
"""
|
||||||
|
Look up a query against name and aliases.
|
||||||
|
|
||||||
|
Returns dict with the matching entry plus a 'confidence' field:
|
||||||
|
- "exact": full name/alias match, OR query starts with alias + word boundary
|
||||||
|
- "partial": alias starts with query + word boundary, or alias appears
|
||||||
|
as a contiguous token sequence inside the query
|
||||||
|
- None if no match
|
||||||
|
|
||||||
|
Matching order (first exact wins, else first partial):
|
||||||
|
1. normalized(query) == normalized(name or alias) → exact
|
||||||
|
2. normalized(query) starts with normalized(alias) + " " → exact
|
||||||
|
3. normalized(alias) starts with normalized(query) + " " → partial
|
||||||
|
4. normalized(alias) is a contiguous token sub-sequence → partial
|
||||||
|
"""
|
||||||
|
_reload_if_changed()
|
||||||
|
q = _normalize(query)
|
||||||
|
if not q:
|
||||||
|
return None
|
||||||
|
|
||||||
|
first_exact = None
|
||||||
|
first_partial = None
|
||||||
|
|
||||||
|
for entry in _entries:
|
||||||
|
norm_name = _normalize(entry['name'])
|
||||||
|
check_aliases = [_normalize(a) for a in entry.get('aliases', [])]
|
||||||
|
all_forms = [norm_name] + check_aliases
|
||||||
|
|
||||||
|
for form in all_forms:
|
||||||
|
if not form:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rule 1: exact match
|
||||||
|
if q == form:
|
||||||
|
return {**entry, 'confidence': 'exact'}
|
||||||
|
|
||||||
|
# Rule 2: query starts with alias + word boundary
|
||||||
|
if q.startswith(form + ' '):
|
||||||
|
if first_exact is None:
|
||||||
|
first_exact = entry
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rule 3: alias starts with query (user still typing)
|
||||||
|
if form.startswith(q) and len(q) < len(form):
|
||||||
|
if first_partial is None:
|
||||||
|
first_partial = entry
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rule 4: alias is contiguous token sub-sequence in query
|
||||||
|
# Build regex: token1\s+token2\s+...tokenN
|
||||||
|
tokens = form.split()
|
||||||
|
if len(tokens) >= 1:
|
||||||
|
pattern = r'(?:^|\s)' + r'\s+'.join(re.escape(t) for t in tokens) + r'(?:\s|$)'
|
||||||
|
if re.search(pattern, q):
|
||||||
|
if first_partial is None:
|
||||||
|
first_partial = entry
|
||||||
|
|
||||||
|
if first_exact is not None:
|
||||||
|
return {**first_exact, 'confidence': 'exact'}
|
||||||
|
|
||||||
|
if first_partial is not None:
|
||||||
|
return {**first_partial, 'confidence': 'partial'}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def list_all():
|
||||||
|
"""Return all address book entries."""
|
||||||
|
_reload_if_changed()
|
||||||
|
return list(_entries)
|
||||||
31
lib/address_book_api.py
Normal file
31
lib/address_book_api.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
"""
|
||||||
|
RECON Address Book API — Flask Blueprint.
|
||||||
|
|
||||||
|
GET /api/address_book/lookup?q=<query> — best match or 404
|
||||||
|
GET /api/address_book/list — all entries
|
||||||
|
"""
|
||||||
|
|
||||||
|
from flask import Blueprint, request, jsonify
|
||||||
|
|
||||||
|
from . import address_book
|
||||||
|
|
||||||
|
address_book_bp = Blueprint('address_book', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@address_book_bp.route('/api/address_book/lookup')
|
||||||
|
def api_address_book_lookup():
|
||||||
|
q = request.args.get('q', '').strip()
|
||||||
|
if not q:
|
||||||
|
return jsonify({'error': 'Missing q parameter'}), 400
|
||||||
|
|
||||||
|
result = address_book.lookup(q)
|
||||||
|
if result is None:
|
||||||
|
return '', 404
|
||||||
|
|
||||||
|
return jsonify(result)
|
||||||
|
|
||||||
|
|
||||||
|
@address_book_bp.route('/api/address_book/list')
|
||||||
|
def api_address_book_list():
|
||||||
|
entries = address_book.list_all()
|
||||||
|
return jsonify(entries)
|
||||||
91
lib/address_book_test.py
Normal file
91
lib/address_book_test.py
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for RECON address book module."""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Add project root to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from lib import address_book
|
||||||
|
|
||||||
|
TESTS = [
|
||||||
|
# ── Existing tests ──
|
||||||
|
("lookup('home') → exact",
|
||||||
|
lambda: address_book.lookup("home"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('Home') → exact (case-insensitive)",
|
||||||
|
lambda: address_book.lookup("Home"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('214 north st') → exact via alias",
|
||||||
|
lambda: address_book.lookup("214 north st"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('214 North Street') → exact via alias",
|
||||||
|
lambda: address_book.lookup("214 North Street"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('nonexistent place') → None",
|
||||||
|
lambda: address_book.lookup("nonexistent place"),
|
||||||
|
lambda r: r is None),
|
||||||
|
|
||||||
|
("list_all() → 1 entry",
|
||||||
|
lambda: address_book.list_all(),
|
||||||
|
lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'),
|
||||||
|
|
||||||
|
# ── New prefix+boundary tests ──
|
||||||
|
("lookup('214 north st filer') → exact (query starts with alias)",
|
||||||
|
lambda: address_book.lookup("214 north st filer"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('214 North St Filer ID') → exact (case + trailing state)",
|
||||||
|
lambda: address_book.lookup("214 North St Filer ID"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('214 north st, filer, id') → exact (commas stripped)",
|
||||||
|
lambda: address_book.lookup("214 north st, filer, id"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('home today') → exact (short alias + trailing text)",
|
||||||
|
lambda: address_book.lookup("home today"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'),
|
||||||
|
|
||||||
|
("lookup('214') → partial (query is prefix of alias)",
|
||||||
|
lambda: address_book.lookup("214"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'partial'),
|
||||||
|
|
||||||
|
("lookup('214 n') → partial (partial prefix of alias)",
|
||||||
|
lambda: address_book.lookup("214 n"),
|
||||||
|
lambda r: r is not None and r['confidence'] == 'partial'),
|
||||||
|
|
||||||
|
("lookup('completely unrelated query') → None",
|
||||||
|
lambda: address_book.lookup("completely unrelated query"),
|
||||||
|
lambda r: r is None),
|
||||||
|
|
||||||
|
("lookup('214 north streets of filer') → None (no word boundary after st)",
|
||||||
|
lambda: address_book.lookup("214 north streets of filer"),
|
||||||
|
lambda r: r is None),
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
for name, fn, check in TESTS:
|
||||||
|
try:
|
||||||
|
result = fn()
|
||||||
|
ok = check(result)
|
||||||
|
except Exception as e:
|
||||||
|
ok = False
|
||||||
|
result = f"EXCEPTION: {e}"
|
||||||
|
|
||||||
|
status = "PASS" if ok else "FAIL"
|
||||||
|
if ok:
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
print(f" [{status}] {name}")
|
||||||
|
if not ok:
|
||||||
|
print(f" got: {result}")
|
||||||
|
|
||||||
|
print(f"\n{passed} passed, {failed} failed")
|
||||||
|
sys.exit(0 if failed == 0 else 1)
|
||||||
10
lib/api.py
10
lib/api.py
|
|
@ -57,6 +57,16 @@ class _LargeZimRequest(_FlaskRequest):
|
||||||
return super()._get_file_stream(total_content_length, content_type, filename, content_length)
|
return super()._get_file_stream(total_content_length, content_type, filename, content_length)
|
||||||
|
|
||||||
app.request_class = _LargeZimRequest
|
app.request_class = _LargeZimRequest
|
||||||
|
# ── Address Book Blueprint ──
|
||||||
|
from .address_book_api import address_book_bp
|
||||||
|
app.register_blueprint(address_book_bp)
|
||||||
|
|
||||||
|
# ── Netsyms + Geocode Blueprints ──
|
||||||
|
from .netsyms_api import netsyms_bp, geocode_bp
|
||||||
|
app.register_blueprint(netsyms_bp)
|
||||||
|
app.register_blueprint(geocode_bp)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ── Navigation Constants ──
|
# ── Navigation Constants ──
|
||||||
|
|
||||||
|
|
|
||||||
117
lib/aurora_nav_tool.py
Normal file
117
lib/aurora_nav_tool.py
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
"""
|
||||||
|
title: Navigation
|
||||||
|
author: Echo6
|
||||||
|
version: 1.1.0
|
||||||
|
description: Turn-by-turn directions and geocoding via Photon + Valhalla on recon-vm. Supports driving, walking, cycling, and truck routing with worldwide coverage (281M places).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$')
|
||||||
|
|
||||||
|
|
||||||
|
class Tools:
|
||||||
|
class Valves(BaseModel):
|
||||||
|
photon_url: str = Field(
|
||||||
|
default="http://100.64.0.24:2322",
|
||||||
|
description="Photon geocoding service URL (recon-vm)",
|
||||||
|
)
|
||||||
|
valhalla_url: str = Field(
|
||||||
|
default="http://100.64.0.24:8002",
|
||||||
|
description="Valhalla routing service URL (recon-vm)",
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.valves = self.Valves()
|
||||||
|
|
||||||
|
def _geocode(self, query: str):
|
||||||
|
m = _COORD_RE.match(query.strip())
|
||||||
|
if m:
|
||||||
|
lat, lon = float(m.group(1)), float(m.group(2))
|
||||||
|
return lat, lon, query
|
||||||
|
resp = requests.get(
|
||||||
|
f"{self.valves.photon_url}/api",
|
||||||
|
params={"q": query, "limit": 1},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
features = resp.json().get("features", [])
|
||||||
|
if not features:
|
||||||
|
return None, None, None
|
||||||
|
props = features[0]["properties"]
|
||||||
|
coords = features[0]["geometry"]["coordinates"]
|
||||||
|
parts = [props.get("name", "")]
|
||||||
|
for key in ("city", "state", "country"):
|
||||||
|
v = props.get(key)
|
||||||
|
if v and v != parts[-1]:
|
||||||
|
parts.append(v)
|
||||||
|
return coords[1], coords[0], ", ".join(p for p in parts if p)
|
||||||
|
|
||||||
|
def get_directions(
|
||||||
|
self,
|
||||||
|
origin: str,
|
||||||
|
destination: str,
|
||||||
|
mode: str = "auto",
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Get turn-by-turn directions between two locations. When this tool returns results, present the directions exactly as returned — do not summarize or rephrase. Include all steps.
|
||||||
|
|
||||||
|
:param origin: Starting location — address, place name, or lat,lon coordinates
|
||||||
|
:param destination: Destination — address, place name, or lat,lon coordinates
|
||||||
|
:param mode: Travel mode: auto, pedestrian, bicycle, or truck (default: auto)
|
||||||
|
:return: Formatted turn-by-turn directions
|
||||||
|
"""
|
||||||
|
if mode not in ("auto", "pedestrian", "bicycle", "truck"):
|
||||||
|
mode = "auto"
|
||||||
|
|
||||||
|
orig_lat, orig_lon, orig_name = self._geocode(origin)
|
||||||
|
if orig_lat is None:
|
||||||
|
return f"Could not find location: {origin}"
|
||||||
|
|
||||||
|
dest_lat, dest_lon, dest_name = self._geocode(destination)
|
||||||
|
if dest_lat is None:
|
||||||
|
return f"Could not find location: {destination}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
f"{self.valves.valhalla_url}/route",
|
||||||
|
json={
|
||||||
|
"locations": [
|
||||||
|
{"lat": orig_lat, "lon": orig_lon},
|
||||||
|
{"lat": dest_lat, "lon": dest_lon},
|
||||||
|
],
|
||||||
|
"costing": mode,
|
||||||
|
"directions_options": {"units": "miles"},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
except requests.RequestException:
|
||||||
|
return "Navigation service unavailable"
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return "No route found between locations"
|
||||||
|
|
||||||
|
trip = resp.json()["trip"]
|
||||||
|
summary = trip["summary"]
|
||||||
|
legs = trip["legs"][0]["maneuvers"]
|
||||||
|
|
||||||
|
miles = round(summary["length"], 1)
|
||||||
|
minutes = round(summary["time"] / 60, 1)
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"Directions from {orig_name} to {dest_name} ({mode}):",
|
||||||
|
f"Distance: {miles} miles | Time: {minutes} minutes",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
for i, m in enumerate(legs, 1):
|
||||||
|
inst = m["instruction"]
|
||||||
|
dist = m.get("length", 0)
|
||||||
|
if dist > 0:
|
||||||
|
lines.append(f"{i}. {inst} — {round(dist, 1)} mi")
|
||||||
|
else:
|
||||||
|
lines.append(f"{i}. {inst}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
708
lib/geocode.py
Normal file
708
lib/geocode.py
Normal file
|
|
@ -0,0 +1,708 @@
|
||||||
|
"""
|
||||||
|
RECON geocode — structured preprocessing, multi-source retrieval, reranking.
|
||||||
|
|
||||||
|
Replaces the naive Photon-only search with:
|
||||||
|
1. usaddress parsing + intent classification (ADDRESS / POI / LOCALITY / COORD / POSTCODE)
|
||||||
|
2. Multi-source retrieval: ADDRESS → Netsyms + Photon; POI/LOCALITY → Photon /api
|
||||||
|
3. Python reranker with weighted signals
|
||||||
|
|
||||||
|
Public entry point: geocode(query, limit) → {query, results, count}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import usaddress
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
from .utils import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging('recon.geocode')
|
||||||
|
|
||||||
|
# ── Trace logger for reranking audit ──
|
||||||
|
_trace_logger = logging.getLogger('recon.geocode.trace')
|
||||||
|
_trace_handler = logging.FileHandler('/tmp/geocode_rerank_trace.log')
|
||||||
|
_trace_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
|
||||||
|
_trace_logger.addHandler(_trace_handler)
|
||||||
|
_trace_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# ── Config constants ──
|
||||||
|
PHOTON_URL = "http://localhost:2322"
|
||||||
|
GEOCODE_BIAS_LAT = 42.5736
|
||||||
|
GEOCODE_BIAS_LON = -114.6066
|
||||||
|
GEOCODE_BIAS_ZOOM = 10
|
||||||
|
ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75
|
||||||
|
|
||||||
|
# ── Reranker weights ──
|
||||||
|
# Derived from research analysis of failure modes:
|
||||||
|
# housenumber_exact is the strongest signal because Photon's soft-boost
|
||||||
|
# lets wrong-number results bubble up. street_name_fuzz and locality_fuzz
|
||||||
|
# handle abbreviation/case variation. source_authority gives Netsyms a
|
||||||
|
# boost for US addresses since it has USPS-verified data.
|
||||||
|
W_HOUSENUMBER_EXACT = 6.0 # exact housenumber match
|
||||||
|
W_HOUSENUMBER_MISMATCH = -5.0 # housenumber present but wrong
|
||||||
|
W_STREET_NAME_FUZZ = 3.0 # fuzzy street name similarity [0..1] * weight
|
||||||
|
W_TOKEN_COVERAGE = 2.0 # fraction of query tokens found in result
|
||||||
|
W_STREET_TYPE_MATCH = 1.5 # "st" matches "street", etc.
|
||||||
|
W_LOCALITY_FUZZ = 2.0 # city/state fuzzy match
|
||||||
|
W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses
|
||||||
|
W_LAYER_RANK = 1.0 # type-appropriate results ranked higher
|
||||||
|
W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position)
|
||||||
|
W_STATE_EXACT = 1.0 # exact state code match
|
||||||
|
|
||||||
|
# ── US abbreviation expansions ──
|
||||||
|
# Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals.
|
||||||
|
_STREET_TYPE_ABBREVS = {
|
||||||
|
'st': 'street', 'ave': 'avenue', 'blvd': 'boulevard', 'dr': 'drive',
|
||||||
|
'rd': 'road', 'ln': 'lane', 'ct': 'court', 'cir': 'circle',
|
||||||
|
'pl': 'place', 'way': 'way', 'pkwy': 'parkway', 'hwy': 'highway',
|
||||||
|
'trl': 'trail', 'ter': 'terrace', 'sq': 'square',
|
||||||
|
}
|
||||||
|
_DIRECTIONAL_ABBREVS = {
|
||||||
|
'n': 'north', 's': 'south', 'e': 'east', 'w': 'west',
|
||||||
|
'ne': 'northeast', 'nw': 'northwest', 'se': 'southeast', 'sw': 'southwest',
|
||||||
|
}
|
||||||
|
_ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE)
|
||||||
|
|
||||||
|
# ── US state codes ──
|
||||||
|
_STATE_CODES = {
|
||||||
|
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
|
||||||
|
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
|
||||||
|
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
|
||||||
|
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
|
||||||
|
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Coordinate regex
|
||||||
|
_COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$')
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STEP 1: PREPROCESSING
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def _parse_coords(text):
|
||||||
|
"""Return (lat, lon) if text looks like coordinates with valid bounds, else None."""
|
||||||
|
m = _COORD_RE.match(text.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
lat, lon = float(m.group(1)), float(m.group(2))
|
||||||
|
if -90 <= lat <= 90 and -180 <= lon <= 180:
|
||||||
|
return lat, lon
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_and_parse(query):
|
||||||
|
"""
|
||||||
|
Parse query with usaddress, classify intent, expand abbreviations.
|
||||||
|
|
||||||
|
Returns (intent, parsed_dict) where:
|
||||||
|
intent: 'ADDRESS' | 'POI' | 'LOCALITY' | 'POSTCODE' | 'COORD' | 'UNKNOWN'
|
||||||
|
parsed_dict: {number, street, city, state, zipcode, raw_query, expanded_query}
|
||||||
|
"""
|
||||||
|
q = query.strip()
|
||||||
|
parsed = {
|
||||||
|
'number': None, 'street': None, 'street_raw': None,
|
||||||
|
'city': None, 'state': None,
|
||||||
|
'zipcode': None, 'raw_query': q, 'expanded_query': q,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Coordinate check first
|
||||||
|
if _parse_coords(q):
|
||||||
|
return 'COORD', parsed
|
||||||
|
|
||||||
|
# Try usaddress
|
||||||
|
try:
|
||||||
|
tagged, addr_type = usaddress.tag(q)
|
||||||
|
except usaddress.RepeatedLabelError:
|
||||||
|
# Ambiguous input — fall back to free-text Photon
|
||||||
|
return 'UNKNOWN', parsed
|
||||||
|
|
||||||
|
# Extract components
|
||||||
|
number = tagged.get('AddressNumber', '').strip()
|
||||||
|
street_name = tagged.get('StreetName', '').strip()
|
||||||
|
street_pre_dir = tagged.get('StreetNamePreDirectional', '').strip()
|
||||||
|
street_post_type = tagged.get('StreetNamePostType', '').strip()
|
||||||
|
place = tagged.get('PlaceName', '').strip()
|
||||||
|
state = tagged.get('StateName', '').strip()
|
||||||
|
zipcode = tagged.get('ZipCode', '').strip()
|
||||||
|
|
||||||
|
# ── Fix usaddress edge case: "214 N St Filer" ──
|
||||||
|
# usaddress reads single-letter directional + "St" as PreDirectional + empty,
|
||||||
|
# mashing "St Filer" into StreetName. Detect: PreDirectional is single letter,
|
||||||
|
# StreetName has 2+ tokens where the first is a street type.
|
||||||
|
if (street_pre_dir and len(street_pre_dir) <= 2
|
||||||
|
and not street_name.strip().startswith(street_pre_dir)
|
||||||
|
and ' ' in street_name):
|
||||||
|
name_tokens = street_name.split()
|
||||||
|
first_lower = name_tokens[0].lower()
|
||||||
|
if first_lower in _STREET_TYPE_ABBREVS or first_lower in _STREET_TYPE_ABBREVS.values():
|
||||||
|
# "N" is actually the street name, "St" is the post-type
|
||||||
|
street_name = street_pre_dir
|
||||||
|
street_post_type = name_tokens[0]
|
||||||
|
if len(name_tokens) > 1:
|
||||||
|
place = ' '.join(name_tokens[1:])
|
||||||
|
street_pre_dir = ''
|
||||||
|
|
||||||
|
# ── Expand abbreviations (guard ordinals) ──
|
||||||
|
expanded_parts = []
|
||||||
|
|
||||||
|
if number:
|
||||||
|
parsed['number'] = number
|
||||||
|
expanded_parts.append(number)
|
||||||
|
|
||||||
|
if street_pre_dir:
|
||||||
|
exp = _DIRECTIONAL_ABBREVS.get(street_pre_dir.lower(), street_pre_dir)
|
||||||
|
expanded_parts.append(exp)
|
||||||
|
|
||||||
|
if street_name:
|
||||||
|
# Don't expand ordinals: "21st" stays "21st"
|
||||||
|
if _ORDINAL_RE.match(street_name):
|
||||||
|
expanded_parts.append(street_name)
|
||||||
|
else:
|
||||||
|
# Expand directional abbreviation if it IS the street name
|
||||||
|
exp = _DIRECTIONAL_ABBREVS.get(street_name.lower(), street_name)
|
||||||
|
expanded_parts.append(exp)
|
||||||
|
parsed['street'] = street_name
|
||||||
|
|
||||||
|
if street_post_type:
|
||||||
|
if _ORDINAL_RE.match(street_post_type):
|
||||||
|
expanded_parts.append(street_post_type)
|
||||||
|
else:
|
||||||
|
exp = _STREET_TYPE_ABBREVS.get(street_post_type.lower(), street_post_type)
|
||||||
|
expanded_parts.append(exp)
|
||||||
|
|
||||||
|
# Build raw street (original abbreviations, for Netsyms) and expanded (for Photon)
|
||||||
|
raw_street_parts = []
|
||||||
|
if street_pre_dir:
|
||||||
|
raw_street_parts.append(street_pre_dir)
|
||||||
|
if street_name:
|
||||||
|
raw_street_parts.append(street_name)
|
||||||
|
if street_post_type:
|
||||||
|
raw_street_parts.append(street_post_type)
|
||||||
|
parsed['street_raw'] = ' '.join(raw_street_parts)
|
||||||
|
|
||||||
|
# Build the full expanded street
|
||||||
|
if expanded_parts:
|
||||||
|
# The street is everything after the number
|
||||||
|
street_full = ' '.join(expanded_parts[1:] if number else expanded_parts)
|
||||||
|
parsed['street'] = street_full
|
||||||
|
|
||||||
|
if place:
|
||||||
|
parsed['city'] = place
|
||||||
|
expanded_parts.append(place)
|
||||||
|
if state:
|
||||||
|
parsed['state'] = state.upper()
|
||||||
|
expanded_parts.append(state)
|
||||||
|
if zipcode:
|
||||||
|
parsed['zipcode'] = zipcode
|
||||||
|
expanded_parts.append(zipcode)
|
||||||
|
|
||||||
|
parsed['expanded_query'] = ' '.join(expanded_parts)
|
||||||
|
|
||||||
|
# ── Intent classification ──
|
||||||
|
if addr_type == 'Street Address' and number:
|
||||||
|
return 'ADDRESS', parsed
|
||||||
|
elif zipcode and not number and not street_name:
|
||||||
|
return 'POSTCODE', parsed
|
||||||
|
elif addr_type == 'Ambiguous':
|
||||||
|
# Check if it looks like a locality: 2 tokens, second is a state code
|
||||||
|
tokens = q.replace(',', ' ').split()
|
||||||
|
if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES:
|
||||||
|
parsed['city'] = ' '.join(tokens[:-1])
|
||||||
|
parsed['state'] = tokens[-1].upper()
|
||||||
|
return 'LOCALITY', parsed
|
||||||
|
return 'UNKNOWN', parsed
|
||||||
|
else:
|
||||||
|
return 'UNKNOWN', parsed
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STEP 2: RETRIEVAL
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def _retrieve_netsyms(parsed, limit=10):
|
||||||
|
"""Query Netsyms for structured address lookup. Returns list of candidate dicts."""
|
||||||
|
try:
|
||||||
|
from . import netsyms
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = []
|
||||||
|
number = parsed.get('number', '')
|
||||||
|
street = parsed.get('street_raw') or parsed.get('street', '')
|
||||||
|
city = parsed.get('city', '')
|
||||||
|
state = parsed.get('state', '')
|
||||||
|
zipcode = parsed.get('zipcode', '')
|
||||||
|
|
||||||
|
if number and street:
|
||||||
|
rows = netsyms.lookup_by_street(
|
||||||
|
number, street, city=city, state=state, zipcode=zipcode, limit=limit
|
||||||
|
)
|
||||||
|
elif zipcode:
|
||||||
|
rows = netsyms.lookup_by_zipcode(zipcode, limit=limit)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
addr_parts = [row['number'], row['street']]
|
||||||
|
if row.get('street2'):
|
||||||
|
addr_parts.append(row['street2'])
|
||||||
|
addr_parts.extend([row['city'], row['state'], row['zipcode']])
|
||||||
|
display = ' '.join(p for p in addr_parts if p)
|
||||||
|
results.append({
|
||||||
|
'name': display,
|
||||||
|
'lat': row['lat'],
|
||||||
|
'lon': row['lon'],
|
||||||
|
'source': 'netsyms',
|
||||||
|
'type': 'street_address',
|
||||||
|
'raw': row,
|
||||||
|
'_number': row.get('number', ''),
|
||||||
|
'_street': row.get('street', ''),
|
||||||
|
'_city': row.get('city', ''),
|
||||||
|
'_state': row.get('state', ''),
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _retrieve_photon_structured(parsed, limit=10):
|
||||||
|
"""Query Photon /structured endpoint for address lookup."""
|
||||||
|
params = {'limit': limit, 'countrycode': 'US'}
|
||||||
|
if parsed.get('street'):
|
||||||
|
params['street'] = parsed['street']
|
||||||
|
if parsed.get('number'):
|
||||||
|
params['housenumber'] = parsed['number']
|
||||||
|
if parsed.get('city'):
|
||||||
|
params['city'] = parsed['city']
|
||||||
|
if parsed.get('state'):
|
||||||
|
params['state'] = parsed['state']
|
||||||
|
|
||||||
|
if 'street' not in params:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(f"{PHOTON_URL}/structured", params=params, timeout=5)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Photon /structured failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
return _parse_photon_features(data.get('features', []), 'photon')
|
||||||
|
|
||||||
|
|
||||||
|
def _retrieve_photon_freetext(query, limit=10):
|
||||||
|
"""Query Photon /api for free-text search with location bias."""
|
||||||
|
try:
|
||||||
|
params = {
|
||||||
|
'q': query,
|
||||||
|
'limit': limit,
|
||||||
|
'lat': GEOCODE_BIAS_LAT,
|
||||||
|
'lon': GEOCODE_BIAS_LON,
|
||||||
|
'zoom': GEOCODE_BIAS_ZOOM,
|
||||||
|
}
|
||||||
|
resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Photon /api failed: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
return _parse_photon_features(data.get('features', []), 'photon')
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_photon_features(features, source):
|
||||||
|
"""Convert Photon GeoJSON features to candidate dicts."""
|
||||||
|
results = []
|
||||||
|
for i, feature in enumerate(features):
|
||||||
|
props = feature.get('properties', {})
|
||||||
|
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
||||||
|
|
||||||
|
osm_key = props.get('osm_key', '')
|
||||||
|
osm_value = props.get('osm_value', '')
|
||||||
|
feat_type = props.get('type', '')
|
||||||
|
has_hn = bool(props.get('housenumber'))
|
||||||
|
|
||||||
|
if has_hn or osm_value in ('house', 'residential'):
|
||||||
|
rtype = 'street_address'
|
||||||
|
elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'):
|
||||||
|
rtype = 'locality'
|
||||||
|
elif osm_key in ('amenity', 'shop', 'tourism', 'leisure'):
|
||||||
|
rtype = 'poi'
|
||||||
|
else:
|
||||||
|
rtype = 'poi'
|
||||||
|
|
||||||
|
# Build display name
|
||||||
|
parts = []
|
||||||
|
hn = props.get('housenumber')
|
||||||
|
street = props.get('street')
|
||||||
|
name = props.get('name', '')
|
||||||
|
if hn and street:
|
||||||
|
parts.append(f"{hn} {street}")
|
||||||
|
if name and name != street:
|
||||||
|
parts.append(name)
|
||||||
|
elif name:
|
||||||
|
parts.append(name)
|
||||||
|
elif street:
|
||||||
|
parts.append(street)
|
||||||
|
for key in ('city', 'county', 'state', 'country'):
|
||||||
|
v = props.get(key)
|
||||||
|
if v and (not parts or v != parts[-1]):
|
||||||
|
parts.append(v)
|
||||||
|
display = ', '.join(p for p in parts if p) or 'Unknown'
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'name': display,
|
||||||
|
'lat': coords[1],
|
||||||
|
'lon': coords[0],
|
||||||
|
'source': source,
|
||||||
|
'type': rtype,
|
||||||
|
'raw': props,
|
||||||
|
'_photon_rank': i,
|
||||||
|
'_number': props.get('housenumber', ''),
|
||||||
|
'_street': props.get('street', ''),
|
||||||
|
'_city': props.get('city', ''),
|
||||||
|
'_state': props.get('state', ''),
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STEP 3: RERANKER
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def _expand_street_type(s):
|
||||||
|
"""Expand a street type abbreviation for comparison."""
|
||||||
|
return _STREET_TYPE_ABBREVS.get(s.lower(), s.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def _score_candidate(candidate, parsed, intent):
|
||||||
|
"""
|
||||||
|
Score a candidate against the parsed query.
|
||||||
|
Returns (total_score, signal_breakdown_dict).
|
||||||
|
"""
|
||||||
|
signals = {}
|
||||||
|
total = 0.0
|
||||||
|
|
||||||
|
query_number = (parsed.get('number') or '').strip().upper()
|
||||||
|
query_street = (parsed.get('street') or '').strip().upper()
|
||||||
|
query_city = (parsed.get('city') or '').strip().upper()
|
||||||
|
query_state = (parsed.get('state') or '').strip().upper()
|
||||||
|
|
||||||
|
cand_number = (candidate.get('_number') or '').strip().upper()
|
||||||
|
cand_street = (candidate.get('_street') or '').strip().upper()
|
||||||
|
cand_city = (candidate.get('_city') or '').strip().upper()
|
||||||
|
cand_state = (candidate.get('_state') or '').strip().upper()
|
||||||
|
|
||||||
|
# ── Housenumber ──
|
||||||
|
if intent == 'ADDRESS' and query_number:
|
||||||
|
if cand_number == query_number:
|
||||||
|
signals['housenumber_exact'] = W_HOUSENUMBER_EXACT
|
||||||
|
total += W_HOUSENUMBER_EXACT
|
||||||
|
elif cand_number and cand_number != query_number:
|
||||||
|
signals['housenumber_mismatch'] = W_HOUSENUMBER_MISMATCH
|
||||||
|
total += W_HOUSENUMBER_MISMATCH
|
||||||
|
|
||||||
|
# ── Street name fuzz ──
|
||||||
|
if query_street and cand_street:
|
||||||
|
# Expand both for comparison
|
||||||
|
q_expanded = ' '.join(_expand_street_type(t) for t in query_street.split())
|
||||||
|
c_expanded = ' '.join(_expand_street_type(t) for t in cand_street.split())
|
||||||
|
ratio = fuzz.token_sort_ratio(q_expanded, c_expanded) / 100.0
|
||||||
|
score = ratio * W_STREET_NAME_FUZZ
|
||||||
|
signals['street_name_fuzz'] = round(score, 2)
|
||||||
|
total += score
|
||||||
|
|
||||||
|
# ── Street type match ──
|
||||||
|
if query_street and cand_street:
|
||||||
|
q_tokens = set(_expand_street_type(t) for t in query_street.split())
|
||||||
|
c_tokens = set(_expand_street_type(t) for t in cand_street.split())
|
||||||
|
# Check if the street type words overlap
|
||||||
|
street_types = set(_STREET_TYPE_ABBREVS.values())
|
||||||
|
q_types = q_tokens & street_types
|
||||||
|
c_types = c_tokens & street_types
|
||||||
|
if q_types and q_types & c_types:
|
||||||
|
signals['street_type_match'] = W_STREET_TYPE_MATCH
|
||||||
|
total += W_STREET_TYPE_MATCH
|
||||||
|
|
||||||
|
# ── Token coverage ──
|
||||||
|
raw_q = parsed.get('raw_query', '').upper()
|
||||||
|
q_tokens = set(raw_q.replace(',', ' ').split())
|
||||||
|
if q_tokens:
|
||||||
|
cand_text = candidate.get('name', '').upper()
|
||||||
|
matched = sum(1 for t in q_tokens if t in cand_text)
|
||||||
|
coverage = matched / len(q_tokens)
|
||||||
|
score = coverage * W_TOKEN_COVERAGE
|
||||||
|
signals['token_coverage'] = round(score, 2)
|
||||||
|
total += score
|
||||||
|
|
||||||
|
# ── Locality fuzz ──
|
||||||
|
if query_city and cand_city:
|
||||||
|
ratio = fuzz.ratio(query_city, cand_city) / 100.0
|
||||||
|
score = ratio * W_LOCALITY_FUZZ
|
||||||
|
signals['locality_fuzz'] = round(score, 2)
|
||||||
|
total += score
|
||||||
|
|
||||||
|
# ── State exact ──
|
||||||
|
if query_state and cand_state:
|
||||||
|
if cand_state == query_state:
|
||||||
|
signals['state_exact'] = W_STATE_EXACT
|
||||||
|
total += W_STATE_EXACT
|
||||||
|
|
||||||
|
# ── Source authority ──
|
||||||
|
if candidate.get('source') == 'netsyms' and intent == 'ADDRESS':
|
||||||
|
signals['source_authority'] = W_SOURCE_AUTHORITY
|
||||||
|
total += W_SOURCE_AUTHORITY
|
||||||
|
|
||||||
|
# ── Layer rank (type-appropriate bonus) ──
|
||||||
|
cand_type = candidate.get('type', '')
|
||||||
|
if intent == 'ADDRESS' and cand_type == 'street_address':
|
||||||
|
signals['layer_rank'] = W_LAYER_RANK
|
||||||
|
total += W_LAYER_RANK
|
||||||
|
elif intent == 'LOCALITY' and cand_type == 'locality':
|
||||||
|
signals['layer_rank'] = W_LAYER_RANK
|
||||||
|
total += W_LAYER_RANK
|
||||||
|
elif intent == 'POI' and cand_type == 'poi':
|
||||||
|
signals['layer_rank'] = W_LAYER_RANK
|
||||||
|
total += W_LAYER_RANK
|
||||||
|
|
||||||
|
# ── Photon position normalization ──
|
||||||
|
photon_rank = candidate.get('_photon_rank')
|
||||||
|
if photon_rank is not None:
|
||||||
|
# Top result gets full bonus, decays linearly
|
||||||
|
score = max(0, (1.0 - photon_rank / 10.0)) * W_PHOTON_POSITION_NORM
|
||||||
|
signals['photon_position'] = round(score, 2)
|
||||||
|
total += score
|
||||||
|
|
||||||
|
return round(total, 2), signals
|
||||||
|
|
||||||
|
|
||||||
|
def _build_match_code(candidate, parsed, intent):
|
||||||
|
"""Build a match_code dict indicating match quality for each field."""
|
||||||
|
mc = {}
|
||||||
|
if intent == 'ADDRESS':
|
||||||
|
q_num = (parsed.get('number') or '').strip().upper()
|
||||||
|
c_num = (candidate.get('_number') or '').strip().upper()
|
||||||
|
if q_num and c_num == q_num:
|
||||||
|
mc['housenumber'] = 'matched'
|
||||||
|
elif q_num and c_num:
|
||||||
|
mc['housenumber'] = 'unmatched'
|
||||||
|
elif q_num and not c_num:
|
||||||
|
mc['housenumber'] = 'inferred'
|
||||||
|
|
||||||
|
q_street = (parsed.get('street') or '').strip().upper()
|
||||||
|
c_street = (candidate.get('_street') or '').strip().upper()
|
||||||
|
if q_street and c_street:
|
||||||
|
q_exp = ' '.join(_expand_street_type(t) for t in q_street.split())
|
||||||
|
c_exp = ' '.join(_expand_street_type(t) for t in c_street.split())
|
||||||
|
ratio = fuzz.token_sort_ratio(q_exp, c_exp) / 100.0
|
||||||
|
mc['street'] = 'matched' if ratio > 0.8 else 'unmatched'
|
||||||
|
elif q_street:
|
||||||
|
mc['street'] = 'inferred'
|
||||||
|
|
||||||
|
q_city = (parsed.get('city') or '').strip().upper()
|
||||||
|
c_city = (candidate.get('_city') or '').strip().upper()
|
||||||
|
if q_city and c_city:
|
||||||
|
ratio = fuzz.ratio(q_city, c_city) / 100.0
|
||||||
|
mc['city'] = 'matched' if ratio > 0.8 else 'unmatched'
|
||||||
|
elif q_city:
|
||||||
|
mc['city'] = 'inferred'
|
||||||
|
|
||||||
|
return mc
|
||||||
|
|
||||||
|
|
||||||
|
def _rerank(candidates, parsed, intent, query, limit):
|
||||||
|
"""Score, sort, and trim candidates. Trace-log top 3."""
|
||||||
|
scored = []
|
||||||
|
for c in candidates:
|
||||||
|
total, signals = _score_candidate(c, parsed, intent)
|
||||||
|
c['_score'] = total
|
||||||
|
c['_signals'] = signals
|
||||||
|
scored.append(c)
|
||||||
|
|
||||||
|
scored.sort(key=lambda c: c['_score'], reverse=True)
|
||||||
|
|
||||||
|
# Trace log for audit
|
||||||
|
_trace_logger.debug("─── Query: %r intent=%s ───", query, intent)
|
||||||
|
for i, c in enumerate(scored[:3]):
|
||||||
|
_trace_logger.debug(
|
||||||
|
" #%d score=%.2f src=%s name=%s",
|
||||||
|
i, c['_score'], c.get('source', '?'), c.get('name', '?')[:60]
|
||||||
|
)
|
||||||
|
_trace_logger.debug(" signals=%s", c.get('_signals', {}))
|
||||||
|
|
||||||
|
# Clean internal fields and add match_code
|
||||||
|
result = []
|
||||||
|
for c in scored[:limit]:
|
||||||
|
mc = _build_match_code(c, parsed, intent)
|
||||||
|
|
||||||
|
# Assign confidence from score
|
||||||
|
score = c.get('_score', 0)
|
||||||
|
if score >= 10:
|
||||||
|
confidence = 'exact'
|
||||||
|
elif score >= 5:
|
||||||
|
confidence = 'high'
|
||||||
|
elif score >= 2:
|
||||||
|
confidence = 'medium'
|
||||||
|
else:
|
||||||
|
confidence = 'low'
|
||||||
|
|
||||||
|
entry = {
|
||||||
|
'name': c['name'],
|
||||||
|
'lat': c['lat'],
|
||||||
|
'lon': c['lon'],
|
||||||
|
'source': c['source'],
|
||||||
|
'confidence': confidence,
|
||||||
|
'type': c.get('type', 'poi'),
|
||||||
|
'raw': c.get('raw'),
|
||||||
|
}
|
||||||
|
if mc:
|
||||||
|
entry['match_code'] = mc
|
||||||
|
result.append(entry)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# STEP 4: ANNOTATION
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def _haversine_m(lat1, lon1, lat2, lon2):
|
||||||
|
"""Haversine distance in meters."""
|
||||||
|
R = 6_371_000
|
||||||
|
rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
|
||||||
|
dlat = math.radians(lat2 - lat1)
|
||||||
|
dlon = math.radians(lon2 - lon1)
|
||||||
|
a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
|
||||||
|
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||||
|
|
||||||
|
|
||||||
|
def _annotate_with_address_book(results):
|
||||||
|
"""Add labeled_as to results within radius of an address book entry."""
|
||||||
|
try:
|
||||||
|
from . import address_book
|
||||||
|
entries = address_book.load()
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
for result in results:
|
||||||
|
rlat, rlon = result.get('lat'), result.get('lon')
|
||||||
|
if rlat is None or rlon is None:
|
||||||
|
continue
|
||||||
|
for entry in entries:
|
||||||
|
elat, elon = entry.get('lat'), entry.get('lon')
|
||||||
|
if elat is None or elon is None:
|
||||||
|
continue
|
||||||
|
if _haversine_m(rlat, rlon, elat, elon) <= ADDRESS_BOOK_ANNOTATION_RADIUS_M:
|
||||||
|
result['labeled_as'] = entry['name']
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
# PUBLIC API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def geocode(query, limit=10):
|
||||||
|
"""
|
||||||
|
Structured geocoding with multi-source retrieval and reranking.
|
||||||
|
|
||||||
|
Returns {query, results: [...], count} — always 200-safe.
|
||||||
|
"""
|
||||||
|
limit = max(1, min(limit, 20))
|
||||||
|
q = (query or '').strip()
|
||||||
|
empty = {'query': q, 'results': [], 'count': 0}
|
||||||
|
|
||||||
|
if not q:
|
||||||
|
return empty
|
||||||
|
|
||||||
|
# ── Coordinate detection ──
|
||||||
|
coords = _parse_coords(q)
|
||||||
|
if coords:
|
||||||
|
return {
|
||||||
|
'query': q,
|
||||||
|
'results': [{
|
||||||
|
'name': q,
|
||||||
|
'lat': coords[0],
|
||||||
|
'lon': coords[1],
|
||||||
|
'source': 'coordinates',
|
||||||
|
'confidence': 'exact',
|
||||||
|
'type': 'coordinates',
|
||||||
|
'raw': None,
|
||||||
|
}],
|
||||||
|
'count': 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Address book nickname short-circuit ──
|
||||||
|
normalized_q = ' '.join(q.lower().replace(',', ' ').split())
|
||||||
|
is_single_word = ' ' not in normalized_q
|
||||||
|
try:
|
||||||
|
from . import address_book
|
||||||
|
ab_match = address_book.lookup(q)
|
||||||
|
if (ab_match
|
||||||
|
and ab_match['confidence'] == 'exact'
|
||||||
|
and ab_match.get('lat') and ab_match.get('lon')
|
||||||
|
and is_single_word):
|
||||||
|
logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name'])
|
||||||
|
return {
|
||||||
|
'query': q,
|
||||||
|
'results': [{
|
||||||
|
'name': ab_match.get('address') or ab_match['name'],
|
||||||
|
'lat': ab_match['lat'],
|
||||||
|
'lon': ab_match['lon'],
|
||||||
|
'source': 'address_book',
|
||||||
|
'confidence': 'exact',
|
||||||
|
'type': 'nickname',
|
||||||
|
'raw': ab_match,
|
||||||
|
}],
|
||||||
|
'count': 1,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("geocode: address_book lookup failed: %s", e)
|
||||||
|
|
||||||
|
# ── Classify intent + parse ──
|
||||||
|
intent, parsed = _classify_and_parse(q)
|
||||||
|
logger.debug("geocode: intent=%s parsed=%s", intent, parsed)
|
||||||
|
|
||||||
|
# ── Retrieve candidates ──
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
if intent == 'ADDRESS':
|
||||||
|
# Parallel: Netsyms (structured) + Photon (freetext with expanded query)
|
||||||
|
netsyms_results = _retrieve_netsyms(parsed, limit=limit)
|
||||||
|
photon_results = _retrieve_photon_freetext(
|
||||||
|
parsed.get('expanded_query', q), limit=limit
|
||||||
|
)
|
||||||
|
# Also try Photon /structured for addresses
|
||||||
|
photon_struct = _retrieve_photon_structured(parsed, limit=5)
|
||||||
|
candidates = netsyms_results + photon_results + photon_struct
|
||||||
|
|
||||||
|
elif intent == 'POSTCODE':
|
||||||
|
netsyms_results = _retrieve_netsyms(parsed, limit=limit)
|
||||||
|
photon_results = _retrieve_photon_freetext(q, limit=limit)
|
||||||
|
candidates = netsyms_results + photon_results
|
||||||
|
|
||||||
|
elif intent in ('LOCALITY', 'POI', 'UNKNOWN'):
|
||||||
|
candidates = _retrieve_photon_freetext(q, limit=limit)
|
||||||
|
|
||||||
|
# ── Deduplicate by (lat, lon) proximity ──
|
||||||
|
deduped = []
|
||||||
|
for c in candidates:
|
||||||
|
is_dup = False
|
||||||
|
for existing in deduped:
|
||||||
|
if (_haversine_m(c['lat'], c['lon'], existing['lat'], existing['lon']) < 50
|
||||||
|
and c.get('source') == existing.get('source')):
|
||||||
|
is_dup = True
|
||||||
|
break
|
||||||
|
if not is_dup:
|
||||||
|
deduped.append(c)
|
||||||
|
candidates = deduped
|
||||||
|
|
||||||
|
# ── Rerank ──
|
||||||
|
results = _rerank(candidates, parsed, intent, q, limit)
|
||||||
|
|
||||||
|
# ── Address book annotation ──
|
||||||
|
_annotate_with_address_book(results)
|
||||||
|
|
||||||
|
logger.info("geocode: %r → intent=%s, %d results", q, intent, len(results))
|
||||||
|
return {'query': q, 'results': results, 'count': len(results)}
|
||||||
157
lib/geocode_test.py
Normal file
157
lib/geocode_test.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for RECON Photon-first geocode chain."""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
BASE = "http://localhost:8420"
|
||||||
|
|
||||||
|
TESTS = [
|
||||||
|
{
|
||||||
|
"name": "home → nickname short-circuit",
|
||||||
|
"query": "home",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] == 1
|
||||||
|
and r["results"][0]["source"] == "address_book"
|
||||||
|
and r["results"][0]["confidence"] == "exact"
|
||||||
|
and r["results"][0]["type"] == "nickname"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "214 north st filer → netsyms exact match (multi-word, not nickname)",
|
||||||
|
"query": "214 north st filer",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] >= 1
|
||||||
|
and r["results"][0]["source"] == "netsyms"
|
||||||
|
and r["results"][0]["confidence"] == "exact"
|
||||||
|
and r["results"][0]["type"] == "street_address"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "214 North St, Filer, ID → netsyms (case/punctuation)",
|
||||||
|
"query": "214 North St, Filer, ID",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "214 NORTH ST FILER ID → netsyms (uppercase)",
|
||||||
|
"query": "214 NORTH ST FILER ID",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "1600 Pennsylvania Ave Washington DC → White House",
|
||||||
|
"query": "1600 Pennsylvania Ave Washington DC",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] >= 1
|
||||||
|
and r["results"][0]["source"] == "photon"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "1600 pennsylvania ave washington dc → lowercase",
|
||||||
|
"query": "1600 pennsylvania ave washington dc",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "starbucks filer → POI result",
|
||||||
|
"query": "starbucks filer",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "filer idaho → locality",
|
||||||
|
"query": "filer idaho",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] >= 1
|
||||||
|
and r["results"][0]["source"] == "photon"
|
||||||
|
and r["results"][0]["type"] == "locality"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "filer → partial query, at least 1 result",
|
||||||
|
"query": "filer",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "42.5736, -114.6066 → coordinates (with space)",
|
||||||
|
"query": "42.5736, -114.6066",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] == 1
|
||||||
|
and r["results"][0]["source"] == "coordinates"
|
||||||
|
and r["results"][0]["confidence"] == "exact"
|
||||||
|
and r["results"][0]["type"] == "coordinates"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "42.5736,-114.6066 → coordinates (no space)",
|
||||||
|
"query": "42.5736,-114.6066",
|
||||||
|
"check": lambda r: (
|
||||||
|
r["count"] == 1
|
||||||
|
and r["results"][0]["source"] == "coordinates"
|
||||||
|
and r["results"][0]["confidence"] == "exact"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "boise → at least 1 result",
|
||||||
|
"query": "boise",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "toronto → CA canary",
|
||||||
|
"query": "toronto",
|
||||||
|
"check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "asdfghjklqwerty → empty results, 200 OK",
|
||||||
|
"query": "asdfghjklqwerty",
|
||||||
|
"check": lambda r: r["count"] == 0 and r["results"] == [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "empty query → empty results",
|
||||||
|
"query": "",
|
||||||
|
"check": lambda r: r["count"] == 0 and r["results"] == [],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for t in TESTS:
|
||||||
|
q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q="
|
||||||
|
url = f"{BASE}/api/geocode?{q}"
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url)
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
status = resp.status
|
||||||
|
body = json.loads(resp.read())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
status = e.code
|
||||||
|
try:
|
||||||
|
body = json.loads(e.read())
|
||||||
|
except Exception:
|
||||||
|
body = {}
|
||||||
|
except Exception as e:
|
||||||
|
status = 0
|
||||||
|
body = {}
|
||||||
|
print(f" [FAIL] {t['name']}")
|
||||||
|
print(f" EXCEPTION: {e}")
|
||||||
|
failed += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
ok = status == 200 and t["check"](body)
|
||||||
|
tag = "PASS" if ok else "FAIL"
|
||||||
|
if ok:
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
top = body.get("results", [{}])[0] if body.get("results") else {}
|
||||||
|
top_summary = f"source={top.get('source','—')} type={top.get('type','—')} conf={top.get('confidence','—')} name={top.get('name','—')[:50]}"
|
||||||
|
print(f" [{tag}] {t['name']}")
|
||||||
|
if not ok:
|
||||||
|
print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}")
|
||||||
|
else:
|
||||||
|
labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else ""
|
||||||
|
print(f" → {top_summary}{labeled}")
|
||||||
|
|
||||||
|
print(f"\n{passed} passed, {failed} failed")
|
||||||
|
sys.exit(0 if failed == 0 else 1)
|
||||||
168
lib/nav_tools.py
Normal file
168
lib/nav_tools.py
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
"""Navigation tools: geocoding via Photon and routing via Valhalla."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .utils import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging('recon.nav_tools')
|
||||||
|
|
||||||
|
PHOTON_URL = "http://localhost:2322"
|
||||||
|
VALHALLA_URL = "http://localhost:8002"
|
||||||
|
|
||||||
|
# Regional bias for Photon searches (Idaho-centric for Matt's use case).
|
||||||
|
# Adjustable — Photon uses these to rank nearby results higher.
|
||||||
|
GEOCODE_BIAS_LAT = 42.5736
|
||||||
|
GEOCODE_BIAS_LON = -114.6066
|
||||||
|
GEOCODE_BIAS_ZOOM = 10
|
||||||
|
|
||||||
|
# Distance threshold (meters) for annotating Photon results with address
|
||||||
|
# book labels. 75m covers GPS jitter + geocoder imprecision.
|
||||||
|
ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75
|
||||||
|
|
||||||
|
# Coordinate regex — handles comma-separated and space-separated forms.
|
||||||
|
_COORD_RE = re.compile(
|
||||||
|
r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$'
|
||||||
|
)
|
||||||
|
|
||||||
|
VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_coords(text: str):
|
||||||
|
"""Return (lat, lon) if text looks like coordinates with valid bounds, else None."""
|
||||||
|
m = _COORD_RE.match(text.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
lat, lon = float(m.group(1)), float(m.group(2))
|
||||||
|
if -90 <= lat <= 90 and -180 <= lon <= 180:
|
||||||
|
return lat, lon
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _haversine_m(lat1, lon1, lat2, lon2):
|
||||||
|
"""Haversine distance in meters between two (lat, lon) points."""
|
||||||
|
R = 6_371_000 # Earth radius in meters
|
||||||
|
rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
|
||||||
|
dlat = math.radians(lat2 - lat1)
|
||||||
|
dlon = math.radians(lon2 - lon1)
|
||||||
|
a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
|
||||||
|
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||||
|
|
||||||
|
|
||||||
|
def geocode(query: str, limit: int = 10):
|
||||||
|
"""Delegate to the structured geocode module. See lib/geocode.py."""
|
||||||
|
from . import geocode as geocode_mod
|
||||||
|
return geocode_mod.geocode(query, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
|
def _geocode(query: str):
|
||||||
|
"""Internal: returns (lat, lon, display_name) tuple for route()."""
|
||||||
|
result = geocode(query, limit=1)
|
||||||
|
results = result.get('results', [])
|
||||||
|
if not results:
|
||||||
|
raise ValueError(f"Could not find location: {query}")
|
||||||
|
top = results[0]
|
||||||
|
return top['lat'], top['lon'], top['name']
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_geocode(lat: float, lon: float) -> str:
|
||||||
|
"""Reverse geocode coordinates via Photon. Returns formatted address string."""
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
f"{PHOTON_URL}/reverse",
|
||||||
|
params={"lat": lat, "lon": lon, "limit": 1},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException:
|
||||||
|
raise RuntimeError("Navigation service unavailable")
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
features = data.get("features", [])
|
||||||
|
if not features:
|
||||||
|
return f"{lat}, {lon}"
|
||||||
|
|
||||||
|
props = features[0]["properties"]
|
||||||
|
parts = []
|
||||||
|
for key in ("name", "housenumber", "street", "city", "state", "country", "postcode"):
|
||||||
|
v = props.get(key)
|
||||||
|
if v:
|
||||||
|
parts.append(v)
|
||||||
|
return ", ".join(parts) if parts else f"{lat}, {lon}"
|
||||||
|
|
||||||
|
|
||||||
|
def route(origin: str, destination: str, mode: str = "auto") -> dict:
|
||||||
|
"""
|
||||||
|
Get a route between two locations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
origin: Starting location — address, place name, or "lat,lon"
|
||||||
|
destination: Destination — address, place name, or "lat,lon"
|
||||||
|
mode: Travel mode — auto, pedestrian, bicycle, truck
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with summary, maneuvers, origin/destination info, and raw shape
|
||||||
|
"""
|
||||||
|
if mode not in VALID_MODES:
|
||||||
|
mode = "auto"
|
||||||
|
|
||||||
|
# Geocode both endpoints
|
||||||
|
orig_lat, orig_lon, orig_name = _geocode(origin)
|
||||||
|
dest_lat, dest_lon, dest_name = _geocode(destination)
|
||||||
|
|
||||||
|
# Query Valhalla
|
||||||
|
valhalla_req = {
|
||||||
|
"locations": [
|
||||||
|
{"lat": orig_lat, "lon": orig_lon},
|
||||||
|
{"lat": dest_lat, "lon": dest_lon},
|
||||||
|
],
|
||||||
|
"costing": mode,
|
||||||
|
"directions_options": {"units": "miles"},
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
f"{VALHALLA_URL}/route",
|
||||||
|
json=valhalla_req,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
except requests.RequestException:
|
||||||
|
raise RuntimeError("Navigation service unavailable")
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
try:
|
||||||
|
err = resp.json()
|
||||||
|
msg = err.get("error", "Unknown routing error")
|
||||||
|
except Exception:
|
||||||
|
msg = f"Routing error (HTTP {resp.status_code})"
|
||||||
|
raise RuntimeError(f"No route found between locations: {msg}")
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
trip = data["trip"]
|
||||||
|
summary = trip["summary"]
|
||||||
|
leg = trip["legs"][0]
|
||||||
|
|
||||||
|
# Build maneuver list
|
||||||
|
maneuvers = []
|
||||||
|
for m in leg["maneuvers"]:
|
||||||
|
streets = m.get("street_names", [])
|
||||||
|
maneuvers.append({
|
||||||
|
"instruction": m["instruction"],
|
||||||
|
"distance_miles": round(m.get("length", 0), 2),
|
||||||
|
"street_name": streets[0] if streets else "",
|
||||||
|
"type": m.get("type", 0),
|
||||||
|
"verbal_succinct": m.get("verbal_succinct_transition_instruction", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"origin": {"name": orig_name, "lat": orig_lat, "lon": orig_lon},
|
||||||
|
"destination": {"name": dest_name, "lat": dest_lat, "lon": dest_lon},
|
||||||
|
"summary": {
|
||||||
|
"distance_miles": round(summary["length"], 1),
|
||||||
|
"time_minutes": round(summary["time"] / 60, 1),
|
||||||
|
"mode": mode,
|
||||||
|
},
|
||||||
|
"maneuvers": maneuvers,
|
||||||
|
"shape": leg.get("shape", ""),
|
||||||
|
}
|
||||||
77
lib/nav_tools_test.py
Normal file
77
lib/nav_tools_test.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
"""Tests for nav_tools — run against live Photon + Valhalla services."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
from nav_tools import route, reverse_geocode
|
||||||
|
|
||||||
|
|
||||||
|
def test_route_named():
|
||||||
|
"""route("Buhl Idaho", "Boise Idaho", "auto") returns maneuvers."""
|
||||||
|
print("TEST 1: route('Buhl Idaho', 'Boise Idaho', 'auto')")
|
||||||
|
r = route("Buhl Idaho", "Boise Idaho", "auto")
|
||||||
|
assert r["summary"]["distance_miles"] > 50, f"Expected >50 mi, got {r['summary']['distance_miles']}"
|
||||||
|
assert r["summary"]["time_minutes"] > 60, f"Expected >60 min, got {r['summary']['time_minutes']}"
|
||||||
|
assert len(r["maneuvers"]) > 5, f"Expected >5 maneuvers, got {len(r['maneuvers'])}"
|
||||||
|
assert r["shape"], "Missing polyline shape"
|
||||||
|
print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min, {len(r['maneuvers'])} maneuvers")
|
||||||
|
print(f" Origin: {r['origin']['name']}")
|
||||||
|
print(f" Destination: {r['destination']['name']}")
|
||||||
|
print(f" First maneuver: {r['maneuvers'][0]['instruction']}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_route_coords():
|
||||||
|
"""route with raw lat,lon coordinates."""
|
||||||
|
print("\nTEST 2: route('42.5991,-114.7636', '43.615,-116.2023', 'auto')")
|
||||||
|
r = route("42.5991,-114.7636", "43.615,-116.2023", "auto")
|
||||||
|
assert r["summary"]["distance_miles"] > 100, f"Expected >100 mi, got {r['summary']['distance_miles']}"
|
||||||
|
assert len(r["maneuvers"]) > 3, f"Expected >3 maneuvers"
|
||||||
|
print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min")
|
||||||
|
|
||||||
|
|
||||||
|
def test_route_pedestrian():
|
||||||
|
"""route with pedestrian mode."""
|
||||||
|
print("\nTEST 3: route('Buhl Idaho', 'Boise Idaho', 'pedestrian')")
|
||||||
|
r = route("Buhl Idaho", "Boise Idaho", "pedestrian")
|
||||||
|
assert r["summary"]["mode"] == "pedestrian"
|
||||||
|
assert r["summary"]["time_minutes"] > r["summary"]["distance_miles"], "Walking should take more min than miles"
|
||||||
|
print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min (pedestrian)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_reverse_geocode():
|
||||||
|
"""reverse_geocode near Buhl, Idaho."""
|
||||||
|
print("\nTEST 4: reverse_geocode(42.5991, -114.7636)")
|
||||||
|
result = reverse_geocode(42.5991, -114.7636)
|
||||||
|
assert "Buhl" in result or "Twin Falls" in result or "Idaho" in result, f"Expected Buhl/Idaho, got: {result}"
|
||||||
|
print(f" OK — {result}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_route_bad_origin():
|
||||||
|
"""route with nonexistent place returns clean error."""
|
||||||
|
print("\nTEST 5: route('nonexistent place xyz123abc', 'Boise Idaho')")
|
||||||
|
try:
|
||||||
|
r = route("nonexistent place xyz123abc", "Boise Idaho")
|
||||||
|
print(f" FAIL — expected error, got result: {r['summary']}")
|
||||||
|
return False
|
||||||
|
except ValueError as e:
|
||||||
|
print(f" OK — clean error: {e}")
|
||||||
|
except RuntimeError as e:
|
||||||
|
print(f" OK — runtime error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
tests = [test_route_named, test_route_coords, test_route_pedestrian, test_reverse_geocode, test_route_bad_origin]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
try:
|
||||||
|
test()
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" FAIL — {e}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print(f"\n{'='*40}")
|
||||||
|
print(f"Results: {passed} passed, {failed} failed out of {len(tests)}")
|
||||||
|
sys.exit(1 if failed else 0)
|
||||||
228
lib/netsyms.py
Normal file
228
lib/netsyms.py
Normal file
|
|
@ -0,0 +1,228 @@
|
||||||
|
"""
|
||||||
|
RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup.
|
||||||
|
|
||||||
|
Provides 159.78M geocoded addresses as tier-2 between address book
|
||||||
|
(exact named locations) and Photon (full-text global geocoding).
|
||||||
|
|
||||||
|
Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from .utils import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging('recon.netsyms')
|
||||||
|
|
||||||
|
_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite'
|
||||||
|
|
||||||
|
_conn = None
|
||||||
|
_lock = threading.Lock()
|
||||||
|
_cached_row_count = None
|
||||||
|
|
||||||
|
# US states + DC + territories, CA provinces, for free-text parsing
|
||||||
|
_STATE_CODES = {
|
||||||
|
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
|
||||||
|
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
|
||||||
|
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
|
||||||
|
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
|
||||||
|
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY',
|
||||||
|
'DC', 'PR', 'VI', 'GU', 'AS', 'MP',
|
||||||
|
# Canadian provinces
|
||||||
|
'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE',
|
||||||
|
'QC', 'SK', 'YT',
|
||||||
|
}
|
||||||
|
|
||||||
|
_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$')
|
||||||
|
|
||||||
|
|
||||||
|
def _get_conn():
|
||||||
|
"""Lazy-open a read-only SQLite connection."""
|
||||||
|
global _conn
|
||||||
|
if _conn is not None:
|
||||||
|
return _conn
|
||||||
|
with _lock:
|
||||||
|
if _conn is not None:
|
||||||
|
return _conn
|
||||||
|
uri = f'file:{_DB_PATH}?mode=ro'
|
||||||
|
_conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
|
||||||
|
_conn.row_factory = sqlite3.Row
|
||||||
|
logger.info("Netsyms DB opened: %s", _DB_PATH)
|
||||||
|
return _conn
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_dict(row):
|
||||||
|
"""Convert a sqlite3.Row to a plain dict with lat/lon keys."""
|
||||||
|
return {
|
||||||
|
'zipcode': row['zipcode'],
|
||||||
|
'number': row['number'],
|
||||||
|
'street': row['street'],
|
||||||
|
'street2': row['street2'],
|
||||||
|
'city': row['city'],
|
||||||
|
'state': row['state'],
|
||||||
|
'plus4': row['plus4'],
|
||||||
|
'country': row['country'],
|
||||||
|
'lat': float(row['latitude']),
|
||||||
|
'lon': float(row['longitude']),
|
||||||
|
'source': row['source'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_by_street(number, street, city=None, state=None,
|
||||||
|
zipcode=None, country=None, limit=20):
|
||||||
|
"""Match on number + street, with optional qualifiers."""
|
||||||
|
conn = _get_conn()
|
||||||
|
clauses = ['number = ?', 'street = ?']
|
||||||
|
params = [str(number).strip().upper(), street.strip().upper()]
|
||||||
|
|
||||||
|
if city:
|
||||||
|
clauses.append('city = ?')
|
||||||
|
params.append(city.strip().upper())
|
||||||
|
if state:
|
||||||
|
clauses.append('state = ?')
|
||||||
|
params.append(state.strip().upper())
|
||||||
|
if zipcode:
|
||||||
|
clauses.append('zipcode = ?')
|
||||||
|
params.append(zipcode.strip())
|
||||||
|
if country:
|
||||||
|
clauses.append('country = ?')
|
||||||
|
params.append(country.strip().upper())
|
||||||
|
|
||||||
|
sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?"
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
with _lock:
|
||||||
|
try:
|
||||||
|
rows = conn.execute(sql, params).fetchall()
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
logger.warning("Netsyms lookup_by_street error: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = [_row_to_dict(r) for r in rows]
|
||||||
|
logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results",
|
||||||
|
number, street, city, state, len(results))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_free_text(query, country_hint=None):
|
||||||
|
"""Parse a free-text address and look it up."""
|
||||||
|
q = query.strip()
|
||||||
|
if not q:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Strip trailing zipcode if present
|
||||||
|
zipcode = None
|
||||||
|
zip_match = re.search(r'\b(\d{5})\s*$', q)
|
||||||
|
if zip_match:
|
||||||
|
zipcode = zip_match.group(1)
|
||||||
|
q = q[:zip_match.start()].strip().rstrip(',').strip()
|
||||||
|
|
||||||
|
# Strip trailing state
|
||||||
|
tokens = re.split(r'[,\s]+', q)
|
||||||
|
tokens = [t for t in tokens if t]
|
||||||
|
if not tokens:
|
||||||
|
return []
|
||||||
|
|
||||||
|
state = None
|
||||||
|
if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES:
|
||||||
|
state = tokens[-1].upper()
|
||||||
|
tokens = tokens[:-1]
|
||||||
|
|
||||||
|
# Leading digits → number
|
||||||
|
number = None
|
||||||
|
if tokens and re.match(r'^\d', tokens[0]):
|
||||||
|
number = tokens[0]
|
||||||
|
tokens = tokens[1:]
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
# Only a number, or empty — try zipcode if we have one
|
||||||
|
if zipcode:
|
||||||
|
return lookup_by_zipcode(zipcode, limit=20)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# If state was found and we have 2+ tokens remaining, last token is city
|
||||||
|
city = None
|
||||||
|
if state and len(tokens) >= 2:
|
||||||
|
city = tokens[-1]
|
||||||
|
tokens = tokens[:-1]
|
||||||
|
|
||||||
|
street = ' '.join(tokens)
|
||||||
|
|
||||||
|
if number:
|
||||||
|
results = lookup_by_street(number, street, city=city, state=state,
|
||||||
|
zipcode=zipcode, country=country_hint)
|
||||||
|
if results:
|
||||||
|
logger.debug("lookup_free_text(%r) → %d results via street match",
|
||||||
|
query, len(results))
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Fallback: try zipcode only if available
|
||||||
|
if zipcode:
|
||||||
|
return lookup_by_zipcode(zipcode, limit=20)
|
||||||
|
|
||||||
|
logger.debug("lookup_free_text(%r) → 0 results", query)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_by_zipcode(zipcode, limit=100):
|
||||||
|
"""Direct zipcode lookup."""
|
||||||
|
conn = _get_conn()
|
||||||
|
sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?"
|
||||||
|
params = [zipcode.strip(), limit]
|
||||||
|
|
||||||
|
with _lock:
|
||||||
|
try:
|
||||||
|
rows = conn.execute(sql, params).fetchall()
|
||||||
|
except sqlite3.Error as e:
|
||||||
|
logger.warning("Netsyms lookup_by_zipcode error: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = [_row_to_dict(r) for r in rows]
|
||||||
|
logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def health():
|
||||||
|
"""Health check with cached row count."""
|
||||||
|
global _cached_row_count
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(_DB_PATH)
|
||||||
|
except OSError:
|
||||||
|
return {'ok': False, 'row_count': 0, 'file_size_bytes': 0,
|
||||||
|
'indexed_countries': []}
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = _get_conn()
|
||||||
|
except Exception:
|
||||||
|
return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size,
|
||||||
|
'indexed_countries': []}
|
||||||
|
|
||||||
|
if _cached_row_count is None:
|
||||||
|
with _lock:
|
||||||
|
if _cached_row_count is None:
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT COUNT(*) AS cnt FROM addresses"
|
||||||
|
).fetchone()
|
||||||
|
_cached_row_count = row['cnt']
|
||||||
|
except sqlite3.Error:
|
||||||
|
_cached_row_count = 0
|
||||||
|
|
||||||
|
with _lock:
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT DISTINCT country FROM addresses"
|
||||||
|
).fetchall()
|
||||||
|
countries = sorted(r['country'] for r in rows)
|
||||||
|
except sqlite3.Error:
|
||||||
|
countries = []
|
||||||
|
|
||||||
|
return {
|
||||||
|
'ok': True,
|
||||||
|
'row_count': _cached_row_count,
|
||||||
|
'file_size_bytes': file_size,
|
||||||
|
'indexed_countries': countries,
|
||||||
|
}
|
||||||
108
lib/netsyms_api.py
Normal file
108
lib/netsyms_api.py
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
"""
|
||||||
|
RECON Netsyms API + Geocode — Flask Blueprints.
|
||||||
|
|
||||||
|
GET /api/netsyms/lookup?q=<free text>&country=<optional>
|
||||||
|
GET /api/netsyms/health
|
||||||
|
GET /api/geocode?q=<query>&limit=<N> (Photon-first search with ranked results)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from flask import Blueprint, request, jsonify
|
||||||
|
|
||||||
|
from . import netsyms
|
||||||
|
from . import address_book
|
||||||
|
from . import nav_tools
|
||||||
|
from .utils import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging('recon.netsyms_api')
|
||||||
|
|
||||||
|
netsyms_bp = Blueprint('netsyms', __name__)
|
||||||
|
geocode_bp = Blueprint('geocode', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@netsyms_bp.route('/api/netsyms/lookup')
|
||||||
|
def api_netsyms_lookup():
|
||||||
|
q = request.args.get('q', '').strip()
|
||||||
|
if not q:
|
||||||
|
return jsonify({'error': 'Missing q parameter'}), 400
|
||||||
|
|
||||||
|
country = request.args.get('country', '').strip() or None
|
||||||
|
results = netsyms.lookup_free_text(q, country_hint=country)
|
||||||
|
return jsonify({'results': results, 'count': len(results), 'query': q})
|
||||||
|
|
||||||
|
|
||||||
|
@netsyms_bp.route('/api/netsyms/health')
|
||||||
|
def api_netsyms_health():
|
||||||
|
return jsonify(netsyms.health())
|
||||||
|
|
||||||
|
|
||||||
|
@geocode_bp.route('/api/geocode')
|
||||||
|
def api_geocode():
|
||||||
|
"""
|
||||||
|
Photon-first geocoding with ranked candidates.
|
||||||
|
|
||||||
|
GET /api/geocode?q=<query>&limit=<N>
|
||||||
|
|
||||||
|
Always returns 200 OK with:
|
||||||
|
{query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count}
|
||||||
|
|
||||||
|
- source: "address_book" | "coordinates" | "photon"
|
||||||
|
- confidence: "exact" | "high" | "medium" | "low"
|
||||||
|
- type: "nickname" | "coordinates" | "street_address" | "poi" | "locality"
|
||||||
|
- labeled_as: present when result is within 75m of an address book entry
|
||||||
|
- Empty results array is valid (no match). No 404s.
|
||||||
|
"""
|
||||||
|
q = request.args.get('q', '').strip()
|
||||||
|
limit = request.args.get('limit', '10')
|
||||||
|
try:
|
||||||
|
limit = max(1, min(int(limit), 20))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
limit = 10
|
||||||
|
|
||||||
|
result = nav_tools.geocode(q, limit=limit)
|
||||||
|
return jsonify(result)
|
||||||
|
|
||||||
|
|
||||||
|
@geocode_bp.route('/api/reverse')
|
||||||
|
def api_reverse():
|
||||||
|
"""
|
||||||
|
Reverse geocode coordinates via Photon.
|
||||||
|
|
||||||
|
GET /api/reverse?lat=X&lon=Y
|
||||||
|
|
||||||
|
Returns same shape as /api/geocode:
|
||||||
|
{query: "lat,lon", results: [{name, lat, lon, source, type, raw, ...}], count}
|
||||||
|
|
||||||
|
Returns 200 OK with empty results on no match. 400 on invalid coords.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
lat = float(request.args.get('lat', ''))
|
||||||
|
lon = float(request.args.get('lon', ''))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return jsonify({'error': 'Missing or invalid lat/lon parameters'}), 400
|
||||||
|
|
||||||
|
if not (-90 <= lat <= 90) or not (-180 <= lon <= 180):
|
||||||
|
return jsonify({'error': 'Coordinates out of range'}), 400
|
||||||
|
|
||||||
|
query_str = f"{lat},{lon}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
import requests as http_requests
|
||||||
|
resp = http_requests.get(
|
||||||
|
"http://localhost:2322/reverse",
|
||||||
|
params={"lat": lat, "lon": lon, "limit": 1},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
features = data.get("features", [])
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Photon reverse geocode failed for %s", query_str)
|
||||||
|
return jsonify({'query': query_str, 'results': [], 'count': 0})
|
||||||
|
|
||||||
|
if not features:
|
||||||
|
return jsonify({'query': query_str, 'results': [], 'count': 0})
|
||||||
|
|
||||||
|
from .geocode import _parse_photon_features
|
||||||
|
results = _parse_photon_features(features, source='photon_reverse')
|
||||||
|
|
||||||
|
return jsonify({'query': query_str, 'results': results, 'count': len(results)})
|
||||||
80
lib/netsyms_test.py
Normal file
80
lib/netsyms_test.py
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for Netsyms address database module."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Ensure the lib directory is importable
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from lib import netsyms
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_by_street_lowercase():
|
||||||
|
results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID")
|
||||||
|
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||||
|
r = results[0]
|
||||||
|
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
|
||||||
|
assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}"
|
||||||
|
print(" PASS: lookup_by_street (lowercase)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_by_street_uppercase():
|
||||||
|
results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID")
|
||||||
|
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||||
|
r = results[0]
|
||||||
|
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
|
||||||
|
print(" PASS: lookup_by_street (uppercase)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_nonexistent():
|
||||||
|
results = netsyms.lookup_by_street("999999", "Nonexistent Rd",
|
||||||
|
city="Filer", state="ID")
|
||||||
|
assert results == [], f"Expected empty list, got {len(results)} results"
|
||||||
|
print(" PASS: lookup_by_street (nonexistent)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_free_text_with_commas():
|
||||||
|
results = netsyms.lookup_free_text("214 North St, Filer, ID")
|
||||||
|
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||||
|
r = results[0]
|
||||||
|
assert r['city'] == 'FILER', f"City mismatch: {r['city']}"
|
||||||
|
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
|
||||||
|
print(" PASS: lookup_free_text (commas)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_free_text_no_commas():
|
||||||
|
results = netsyms.lookup_free_text("214 North St Filer ID")
|
||||||
|
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||||
|
r = results[0]
|
||||||
|
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
|
||||||
|
print(" PASS: lookup_free_text (no commas)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_by_zipcode():
|
||||||
|
results = netsyms.lookup_by_zipcode("83328", limit=5)
|
||||||
|
assert len(results) == 5, f"Expected 5 results, got {len(results)}"
|
||||||
|
for r in results:
|
||||||
|
assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}"
|
||||||
|
print(" PASS: lookup_by_zipcode")
|
||||||
|
|
||||||
|
|
||||||
|
def test_health():
|
||||||
|
h = netsyms.health()
|
||||||
|
assert h['ok'] is True, f"Health not OK: {h}"
|
||||||
|
assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}"
|
||||||
|
assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}"
|
||||||
|
assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}"
|
||||||
|
print(" PASS: health")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Running Netsyms tests...")
|
||||||
|
test_lookup_by_street_lowercase()
|
||||||
|
test_lookup_by_street_uppercase()
|
||||||
|
test_lookup_nonexistent()
|
||||||
|
test_free_text_with_commas()
|
||||||
|
test_free_text_no_commas()
|
||||||
|
test_lookup_by_zipcode()
|
||||||
|
test_health()
|
||||||
|
print("All tests passed.")
|
||||||
161
lib/query_router.py
Normal file
161
lib/query_router.py
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
"""Semantic query router for Aurora.
|
||||||
|
|
||||||
|
Classifies user queries into routes (nav_route, nav_reverse_geocode,
|
||||||
|
direct_answer, rag_search) by comparing query embeddings against
|
||||||
|
pre-computed route centroids from example queries.
|
||||||
|
|
||||||
|
TEI endpoint: http://100.64.0.14:8090/embed (cortex via Tailscale)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import threading
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# ── Route examples ────────────────────────────────────────────────────────────
|
||||||
|
ROUTE_EXAMPLES = {
|
||||||
|
"nav_route": [
|
||||||
|
"how do I get to Boise",
|
||||||
|
"directions to Twin Falls",
|
||||||
|
"how do I get from Buhl to Boise",
|
||||||
|
"drive from Jerome to Sun Valley",
|
||||||
|
"route from Boise to McCall",
|
||||||
|
"what's the fastest way to Sun Valley",
|
||||||
|
"how far is it to Twin Falls",
|
||||||
|
"take me to Shoshone",
|
||||||
|
"navigate to the airport",
|
||||||
|
"how do I drive to Salt Lake City",
|
||||||
|
"walking directions to the park",
|
||||||
|
"bike route to downtown",
|
||||||
|
],
|
||||||
|
"nav_reverse_geocode": [
|
||||||
|
"what town is at 42.5, -114.7",
|
||||||
|
"where am I right now",
|
||||||
|
"what is at coordinates 43.6, -116.2",
|
||||||
|
"what location is 42.574, -114.607",
|
||||||
|
"where is this place 44.0, -114.3",
|
||||||
|
"what city is near 42.7, -114.5",
|
||||||
|
"reverse geocode 43.0, -115.0",
|
||||||
|
"what's at this location 42.9, -114.8",
|
||||||
|
],
|
||||||
|
"direct_answer": [
|
||||||
|
"hello",
|
||||||
|
"hey aurora",
|
||||||
|
"good morning",
|
||||||
|
"thanks",
|
||||||
|
"thank you",
|
||||||
|
"what's your name",
|
||||||
|
"who are you",
|
||||||
|
"tell me a joke",
|
||||||
|
"how are you",
|
||||||
|
"hi there",
|
||||||
|
],
|
||||||
|
"rag_search": [
|
||||||
|
"what does the survival manual say about water",
|
||||||
|
"how to purify water in the field",
|
||||||
|
"how to treat a gunshot wound",
|
||||||
|
"what is the ranger handbook chapter on patrolling",
|
||||||
|
"field manual water purification",
|
||||||
|
"how to build a shelter in the wilderness",
|
||||||
|
"tactical combat casualty care procedures",
|
||||||
|
"what does FM 21-76 say about fire starting",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Module-level cache ────────────────────────────────────────────────────────
|
||||||
|
_ROUTE_CENTROIDS: dict | None = None
|
||||||
|
_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _embed_batch(texts: list[str], tei_url: str) -> list[list[float]]:
|
||||||
|
"""Embed a batch of texts via TEI."""
|
||||||
|
resp = requests.post(tei_url, json={"inputs": texts}, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_centroid(vectors: list[list[float]]) -> list[float]:
|
||||||
|
"""Element-wise mean of vectors."""
|
||||||
|
n = len(vectors)
|
||||||
|
dim = len(vectors[0])
|
||||||
|
centroid = [0.0] * dim
|
||||||
|
for vec in vectors:
|
||||||
|
for i in range(dim):
|
||||||
|
centroid[i] += vec[i]
|
||||||
|
for i in range(dim):
|
||||||
|
centroid[i] /= n
|
||||||
|
return centroid
|
||||||
|
|
||||||
|
|
||||||
|
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
||||||
|
"""Cosine similarity between two vectors (pure Python)."""
|
||||||
|
dot = 0.0
|
||||||
|
norm_a = 0.0
|
||||||
|
norm_b = 0.0
|
||||||
|
for i in range(len(a)):
|
||||||
|
dot += a[i] * b[i]
|
||||||
|
norm_a += a[i] * a[i]
|
||||||
|
norm_b += b[i] * b[i]
|
||||||
|
denom = math.sqrt(norm_a) * math.sqrt(norm_b)
|
||||||
|
if denom == 0:
|
||||||
|
return 0.0
|
||||||
|
return dot / denom
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_centroids(tei_url: str) -> dict[str, list[float]]:
|
||||||
|
"""Lazy-init: embed all examples in one batch, compute centroids, cache."""
|
||||||
|
global _ROUTE_CENTROIDS
|
||||||
|
if _ROUTE_CENTROIDS is not None:
|
||||||
|
return _ROUTE_CENTROIDS
|
||||||
|
|
||||||
|
with _LOCK:
|
||||||
|
if _ROUTE_CENTROIDS is not None:
|
||||||
|
return _ROUTE_CENTROIDS
|
||||||
|
|
||||||
|
# Flatten all examples into one batch
|
||||||
|
all_texts = []
|
||||||
|
route_ranges: dict[str, tuple[int, int]] = {}
|
||||||
|
offset = 0
|
||||||
|
for route, examples in ROUTE_EXAMPLES.items():
|
||||||
|
route_ranges[route] = (offset, offset + len(examples))
|
||||||
|
all_texts.extend(examples)
|
||||||
|
offset += len(examples)
|
||||||
|
|
||||||
|
all_vectors = _embed_batch(all_texts, tei_url)
|
||||||
|
|
||||||
|
centroids = {}
|
||||||
|
for route, (start, end) in route_ranges.items():
|
||||||
|
centroids[route] = _compute_centroid(all_vectors[start:end])
|
||||||
|
|
||||||
|
_ROUTE_CENTROIDS = centroids
|
||||||
|
return _ROUTE_CENTROIDS
|
||||||
|
|
||||||
|
|
||||||
|
def classify(
|
||||||
|
query: str,
|
||||||
|
tei_url: str = "http://100.64.0.14:8090/embed",
|
||||||
|
threshold: float = 0.45,
|
||||||
|
) -> tuple[str, float]:
|
||||||
|
"""Classify a query into a route.
|
||||||
|
|
||||||
|
Returns (route_name, confidence). If no route exceeds the threshold,
|
||||||
|
returns ("rag_search", best_score) as the safe default.
|
||||||
|
"""
|
||||||
|
centroids = _ensure_centroids(tei_url)
|
||||||
|
|
||||||
|
# Embed the query
|
||||||
|
vecs = _embed_batch([query], tei_url)
|
||||||
|
query_vec = vecs[0]
|
||||||
|
|
||||||
|
# Compare against all centroids
|
||||||
|
best_route = "rag_search"
|
||||||
|
best_score = 0.0
|
||||||
|
for route, centroid in centroids.items():
|
||||||
|
sim = _cosine_similarity(query_vec, centroid)
|
||||||
|
if sim > best_score:
|
||||||
|
best_score = sim
|
||||||
|
best_route = route
|
||||||
|
|
||||||
|
if best_score < threshold:
|
||||||
|
return ("rag_search", best_score)
|
||||||
|
|
||||||
|
return (best_route, best_score)
|
||||||
49
lib/query_router_test.py
Normal file
49
lib/query_router_test.py
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test suite for the semantic query router."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from lib.query_router import classify
|
||||||
|
|
||||||
|
TEST_QUERIES = [
|
||||||
|
("how do I get from Buhl to Boise", "nav_route"),
|
||||||
|
("what does the survival manual say about water", "rag_search"),
|
||||||
|
("what town is at 42.5, -114.7", "nav_reverse_geocode"),
|
||||||
|
("hey aurora", "direct_answer"),
|
||||||
|
("what's the fastest way to Sun Valley", "nav_route"),
|
||||||
|
("how to purify water in the field", "rag_search"),
|
||||||
|
("good morning", "direct_answer"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Query Router Test Suite")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for query, expected in TEST_QUERIES:
|
||||||
|
route, confidence = classify(query)
|
||||||
|
status = "PASS" if route == expected else "FAIL"
|
||||||
|
if status == "PASS":
|
||||||
|
passed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
print(f" [{status}] {query!r}")
|
||||||
|
print(f" → {route} ({confidence:.3f}) expected={expected}")
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Results: {passed}/{passed + failed} passed")
|
||||||
|
if failed:
|
||||||
|
print(f" {failed} FAILED")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print(" All tests passed!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue