diff --git a/config/address_book.yaml b/config/address_book.yaml new file mode 100644 index 0000000..24bc81c --- /dev/null +++ b/config/address_book.yaml @@ -0,0 +1,18 @@ +# RECON Address Book — saved locations for navigation shortcuts. +# Entries are matched by name and aliases (case-insensitive). +# Add new entries by appending to the list below. + +entries: + - id: home + name: Home + aliases: + - home + - matt's house + - 214 north st + - 214 north street + address: "214 North St, Filer, ID 83328" + lat: 42.5735833 + lon: -114.6066389 + tags: + - residence + - primary diff --git a/lib/address_book.py b/lib/address_book.py new file mode 100644 index 0000000..f9827f6 --- /dev/null +++ b/lib/address_book.py @@ -0,0 +1,160 @@ +""" +RECON Address Book — YAML-backed saved-location lookup. + +Provides named locations (home, work, etc.) that short-circuit Photon +geocoding when an exact alias match is found. + +Config: /opt/recon/config/address_book.yaml +""" + +import os +import re +import threading + +import yaml + +from .utils import setup_logging + +logger = setup_logging('recon.address_book') + +_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'config', 'address_book.yaml', +) + +_lock = threading.Lock() +_entries: list[dict] = [] +_mtime: float = 0.0 + + +def _reload_if_changed(): + """Reload the YAML file if its mtime has changed.""" + global _entries, _mtime + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + logger.warning("Address book not found: %s", _CONFIG_PATH) + _entries = [] + _mtime = 0.0 + return + + if st.st_mtime == _mtime: + return + + with _lock: + # Double-check after acquiring lock + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + _entries = [] + _mtime = 0.0 + return + if st.st_mtime == _mtime: + return + + with open(_CONFIG_PATH, 'r') as f: + data = yaml.safe_load(f) or {} + + raw = data.get('entries', []) + loaded = [] + for entry in raw: + # Normalise aliases to lowercase for matching + aliases = [a.lower() for a in entry.get('aliases', [])] + loaded.append({ + 'id': entry.get('id', ''), + 'name': entry.get('name', ''), + 'aliases': aliases, + 'address': entry.get('address', ''), + 'lat': entry.get('lat'), + 'lon': entry.get('lon'), + 'tags': entry.get('tags', []), + }) + _entries = loaded + _mtime = st.st_mtime + logger.info("Address book loaded: %d entries from %s", len(_entries), _CONFIG_PATH) + + +def load(): + """Ensure the address book is loaded (and refreshed if the file changed).""" + _reload_if_changed() + return _entries + + +def _normalize(text: str) -> str: + """Lowercase, strip, remove commas, collapse whitespace.""" + t = text.strip().lower() + t = t.replace(',', ' ') + return ' '.join(t.split()) + + +def lookup(query: str): + """ + Look up a query against name and aliases. + + Returns dict with the matching entry plus a 'confidence' field: + - "exact": full name/alias match, OR query starts with alias + word boundary + - "partial": alias starts with query + word boundary, or alias appears + as a contiguous token sequence inside the query + - None if no match + + Matching order (first exact wins, else first partial): + 1. normalized(query) == normalized(name or alias) → exact + 2. normalized(query) starts with normalized(alias) + " " → exact + 3. normalized(alias) starts with normalized(query) + " " → partial + 4. normalized(alias) is a contiguous token sub-sequence → partial + """ + _reload_if_changed() + q = _normalize(query) + if not q: + return None + + first_exact = None + first_partial = None + + for entry in _entries: + norm_name = _normalize(entry['name']) + check_aliases = [_normalize(a) for a in entry.get('aliases', [])] + all_forms = [norm_name] + check_aliases + + for form in all_forms: + if not form: + continue + + # Rule 1: exact match + if q == form: + return {**entry, 'confidence': 'exact'} + + # Rule 2: query starts with alias + word boundary + if q.startswith(form + ' '): + if first_exact is None: + first_exact = entry + continue + + # Rule 3: alias starts with query (user still typing) + if form.startswith(q) and len(q) < len(form): + if first_partial is None: + first_partial = entry + continue + + # Rule 4: alias is contiguous token sub-sequence in query + # Build regex: token1\s+token2\s+...tokenN + tokens = form.split() + if len(tokens) >= 1: + pattern = r'(?:^|\s)' + r'\s+'.join(re.escape(t) for t in tokens) + r'(?:\s|$)' + if re.search(pattern, q): + if first_partial is None: + first_partial = entry + + if first_exact is not None: + return {**first_exact, 'confidence': 'exact'} + + if first_partial is not None: + return {**first_partial, 'confidence': 'partial'} + + return None + + +def list_all(): + """Return all address book entries.""" + _reload_if_changed() + return list(_entries) diff --git a/lib/address_book_api.py b/lib/address_book_api.py new file mode 100644 index 0000000..020828b --- /dev/null +++ b/lib/address_book_api.py @@ -0,0 +1,31 @@ +""" +RECON Address Book API — Flask Blueprint. + +GET /api/address_book/lookup?q= — best match or 404 +GET /api/address_book/list — all entries +""" + +from flask import Blueprint, request, jsonify + +from . import address_book + +address_book_bp = Blueprint('address_book', __name__) + + +@address_book_bp.route('/api/address_book/lookup') +def api_address_book_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + result = address_book.lookup(q) + if result is None: + return '', 404 + + return jsonify(result) + + +@address_book_bp.route('/api/address_book/list') +def api_address_book_list(): + entries = address_book.list_all() + return jsonify(entries) diff --git a/lib/address_book_test.py b/lib/address_book_test.py new file mode 100644 index 0000000..75905f0 --- /dev/null +++ b/lib/address_book_test.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""Tests for RECON address book module.""" +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import address_book + +TESTS = [ + # ── Existing tests ── + ("lookup('home') → exact", + lambda: address_book.lookup("home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('Home') → exact (case-insensitive)", + lambda: address_book.lookup("Home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st') → exact via alias", + lambda: address_book.lookup("214 north st"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North Street') → exact via alias", + lambda: address_book.lookup("214 North Street"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('nonexistent place') → None", + lambda: address_book.lookup("nonexistent place"), + lambda r: r is None), + + ("list_all() → 1 entry", + lambda: address_book.list_all(), + lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'), + + # ── New prefix+boundary tests ── + ("lookup('214 north st filer') → exact (query starts with alias)", + lambda: address_book.lookup("214 north st filer"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North St Filer ID') → exact (case + trailing state)", + lambda: address_book.lookup("214 North St Filer ID"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st, filer, id') → exact (commas stripped)", + lambda: address_book.lookup("214 north st, filer, id"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('home today') → exact (short alias + trailing text)", + lambda: address_book.lookup("home today"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214') → partial (query is prefix of alias)", + lambda: address_book.lookup("214"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('214 n') → partial (partial prefix of alias)", + lambda: address_book.lookup("214 n"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('completely unrelated query') → None", + lambda: address_book.lookup("completely unrelated query"), + lambda r: r is None), + + ("lookup('214 north streets of filer') → None (no word boundary after st)", + lambda: address_book.lookup("214 north streets of filer"), + lambda r: r is None), +] + +passed = 0 +failed = 0 +for name, fn, check in TESTS: + try: + result = fn() + ok = check(result) + except Exception as e: + ok = False + result = f"EXCEPTION: {e}" + + status = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + print(f" [{status}] {name}") + if not ok: + print(f" got: {result}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/api.py b/lib/api.py index 6a3d627..7c54fe8 100644 --- a/lib/api.py +++ b/lib/api.py @@ -57,6 +57,16 @@ class _LargeZimRequest(_FlaskRequest): return super()._get_file_stream(total_content_length, content_type, filename, content_length) app.request_class = _LargeZimRequest +# ── Address Book Blueprint ── +from .address_book_api import address_book_bp +app.register_blueprint(address_book_bp) + +# ── Netsyms + Geocode Blueprints ── +from .netsyms_api import netsyms_bp, geocode_bp +app.register_blueprint(netsyms_bp) +app.register_blueprint(geocode_bp) + + # ── Navigation Constants ── diff --git a/lib/aurora_nav_tool.py b/lib/aurora_nav_tool.py new file mode 100644 index 0000000..2b7285d --- /dev/null +++ b/lib/aurora_nav_tool.py @@ -0,0 +1,117 @@ +""" +title: Navigation +author: Echo6 +version: 1.1.0 +description: Turn-by-turn directions and geocoding via Photon + Valhalla on recon-vm. Supports driving, walking, cycling, and truck routing with worldwide coverage (281M places). +""" + +import re +import json +import requests +from pydantic import BaseModel, Field + +_COORD_RE = re.compile(r'^(-?\d+\.?\d*)\s*,\s*(-?\d+\.?\d*)$') + + +class Tools: + class Valves(BaseModel): + photon_url: str = Field( + default="http://100.64.0.24:2322", + description="Photon geocoding service URL (recon-vm)", + ) + valhalla_url: str = Field( + default="http://100.64.0.24:8002", + description="Valhalla routing service URL (recon-vm)", + ) + + def __init__(self): + self.valves = self.Valves() + + def _geocode(self, query: str): + m = _COORD_RE.match(query.strip()) + if m: + lat, lon = float(m.group(1)), float(m.group(2)) + return lat, lon, query + resp = requests.get( + f"{self.valves.photon_url}/api", + params={"q": query, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + features = resp.json().get("features", []) + if not features: + return None, None, None + props = features[0]["properties"] + coords = features[0]["geometry"]["coordinates"] + parts = [props.get("name", "")] + for key in ("city", "state", "country"): + v = props.get(key) + if v and v != parts[-1]: + parts.append(v) + return coords[1], coords[0], ", ".join(p for p in parts if p) + + def get_directions( + self, + origin: str, + destination: str, + mode: str = "auto", + ) -> str: + """ + Get turn-by-turn directions between two locations. When this tool returns results, present the directions exactly as returned — do not summarize or rephrase. Include all steps. + + :param origin: Starting location — address, place name, or lat,lon coordinates + :param destination: Destination — address, place name, or lat,lon coordinates + :param mode: Travel mode: auto, pedestrian, bicycle, or truck (default: auto) + :return: Formatted turn-by-turn directions + """ + if mode not in ("auto", "pedestrian", "bicycle", "truck"): + mode = "auto" + + orig_lat, orig_lon, orig_name = self._geocode(origin) + if orig_lat is None: + return f"Could not find location: {origin}" + + dest_lat, dest_lon, dest_name = self._geocode(destination) + if dest_lat is None: + return f"Could not find location: {destination}" + + try: + resp = requests.post( + f"{self.valves.valhalla_url}/route", + json={ + "locations": [ + {"lat": orig_lat, "lon": orig_lon}, + {"lat": dest_lat, "lon": dest_lon}, + ], + "costing": mode, + "directions_options": {"units": "miles"}, + }, + timeout=30, + ) + except requests.RequestException: + return "Navigation service unavailable" + + if resp.status_code != 200: + return "No route found between locations" + + trip = resp.json()["trip"] + summary = trip["summary"] + legs = trip["legs"][0]["maneuvers"] + + miles = round(summary["length"], 1) + minutes = round(summary["time"] / 60, 1) + + lines = [ + f"Directions from {orig_name} to {dest_name} ({mode}):", + f"Distance: {miles} miles | Time: {minutes} minutes", + "", + ] + for i, m in enumerate(legs, 1): + inst = m["instruction"] + dist = m.get("length", 0) + if dist > 0: + lines.append(f"{i}. {inst} — {round(dist, 1)} mi") + else: + lines.append(f"{i}. {inst}") + + return "\n".join(lines) diff --git a/lib/geocode.py b/lib/geocode.py new file mode 100644 index 0000000..21a2403 --- /dev/null +++ b/lib/geocode.py @@ -0,0 +1,708 @@ +""" +RECON geocode — structured preprocessing, multi-source retrieval, reranking. + +Replaces the naive Photon-only search with: + 1. usaddress parsing + intent classification (ADDRESS / POI / LOCALITY / COORD / POSTCODE) + 2. Multi-source retrieval: ADDRESS → Netsyms + Photon; POI/LOCALITY → Photon /api + 3. Python reranker with weighted signals + +Public entry point: geocode(query, limit) → {query, results, count} +""" + +import math +import re +import logging + +import requests +import usaddress +from rapidfuzz import fuzz + +from .utils import setup_logging + +logger = setup_logging('recon.geocode') + +# ── Trace logger for reranking audit ── +_trace_logger = logging.getLogger('recon.geocode.trace') +_trace_handler = logging.FileHandler('/tmp/geocode_rerank_trace.log') +_trace_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s')) +_trace_logger.addHandler(_trace_handler) +_trace_logger.setLevel(logging.DEBUG) + +# ── Config constants ── +PHOTON_URL = "http://localhost:2322" +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# ── Reranker weights ── +# Derived from research analysis of failure modes: +# housenumber_exact is the strongest signal because Photon's soft-boost +# lets wrong-number results bubble up. street_name_fuzz and locality_fuzz +# handle abbreviation/case variation. source_authority gives Netsyms a +# boost for US addresses since it has USPS-verified data. +W_HOUSENUMBER_EXACT = 6.0 # exact housenumber match +W_HOUSENUMBER_MISMATCH = -5.0 # housenumber present but wrong +W_STREET_NAME_FUZZ = 3.0 # fuzzy street name similarity [0..1] * weight +W_TOKEN_COVERAGE = 2.0 # fraction of query tokens found in result +W_STREET_TYPE_MATCH = 1.5 # "st" matches "street", etc. +W_LOCALITY_FUZZ = 2.0 # city/state fuzzy match +W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses +W_LAYER_RANK = 1.0 # type-appropriate results ranked higher +W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position) +W_STATE_EXACT = 1.0 # exact state code match + +# ── US abbreviation expansions ── +# Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals. +_STREET_TYPE_ABBREVS = { + 'st': 'street', 'ave': 'avenue', 'blvd': 'boulevard', 'dr': 'drive', + 'rd': 'road', 'ln': 'lane', 'ct': 'court', 'cir': 'circle', + 'pl': 'place', 'way': 'way', 'pkwy': 'parkway', 'hwy': 'highway', + 'trl': 'trail', 'ter': 'terrace', 'sq': 'square', +} +_DIRECTIONAL_ABBREVS = { + 'n': 'north', 's': 'south', 'e': 'east', 'w': 'west', + 'ne': 'northeast', 'nw': 'northwest', 'se': 'southeast', 'sw': 'southwest', +} +_ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE) + +# ── US state codes ── +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC', +} + +# Coordinate regex +_COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$') + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 1: PREPROCESSING +# ═══════════════════════════════════════════════════════════════════ + +def _parse_coords(text): + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" + m = _COORD_RE.match(text.strip()) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon + return None + + +def _classify_and_parse(query): + """ + Parse query with usaddress, classify intent, expand abbreviations. + + Returns (intent, parsed_dict) where: + intent: 'ADDRESS' | 'POI' | 'LOCALITY' | 'POSTCODE' | 'COORD' | 'UNKNOWN' + parsed_dict: {number, street, city, state, zipcode, raw_query, expanded_query} + """ + q = query.strip() + parsed = { + 'number': None, 'street': None, 'street_raw': None, + 'city': None, 'state': None, + 'zipcode': None, 'raw_query': q, 'expanded_query': q, + } + + # Coordinate check first + if _parse_coords(q): + return 'COORD', parsed + + # Try usaddress + try: + tagged, addr_type = usaddress.tag(q) + except usaddress.RepeatedLabelError: + # Ambiguous input — fall back to free-text Photon + return 'UNKNOWN', parsed + + # Extract components + number = tagged.get('AddressNumber', '').strip() + street_name = tagged.get('StreetName', '').strip() + street_pre_dir = tagged.get('StreetNamePreDirectional', '').strip() + street_post_type = tagged.get('StreetNamePostType', '').strip() + place = tagged.get('PlaceName', '').strip() + state = tagged.get('StateName', '').strip() + zipcode = tagged.get('ZipCode', '').strip() + + # ── Fix usaddress edge case: "214 N St Filer" ── + # usaddress reads single-letter directional + "St" as PreDirectional + empty, + # mashing "St Filer" into StreetName. Detect: PreDirectional is single letter, + # StreetName has 2+ tokens where the first is a street type. + if (street_pre_dir and len(street_pre_dir) <= 2 + and not street_name.strip().startswith(street_pre_dir) + and ' ' in street_name): + name_tokens = street_name.split() + first_lower = name_tokens[0].lower() + if first_lower in _STREET_TYPE_ABBREVS or first_lower in _STREET_TYPE_ABBREVS.values(): + # "N" is actually the street name, "St" is the post-type + street_name = street_pre_dir + street_post_type = name_tokens[0] + if len(name_tokens) > 1: + place = ' '.join(name_tokens[1:]) + street_pre_dir = '' + + # ── Expand abbreviations (guard ordinals) ── + expanded_parts = [] + + if number: + parsed['number'] = number + expanded_parts.append(number) + + if street_pre_dir: + exp = _DIRECTIONAL_ABBREVS.get(street_pre_dir.lower(), street_pre_dir) + expanded_parts.append(exp) + + if street_name: + # Don't expand ordinals: "21st" stays "21st" + if _ORDINAL_RE.match(street_name): + expanded_parts.append(street_name) + else: + # Expand directional abbreviation if it IS the street name + exp = _DIRECTIONAL_ABBREVS.get(street_name.lower(), street_name) + expanded_parts.append(exp) + parsed['street'] = street_name + + if street_post_type: + if _ORDINAL_RE.match(street_post_type): + expanded_parts.append(street_post_type) + else: + exp = _STREET_TYPE_ABBREVS.get(street_post_type.lower(), street_post_type) + expanded_parts.append(exp) + + # Build raw street (original abbreviations, for Netsyms) and expanded (for Photon) + raw_street_parts = [] + if street_pre_dir: + raw_street_parts.append(street_pre_dir) + if street_name: + raw_street_parts.append(street_name) + if street_post_type: + raw_street_parts.append(street_post_type) + parsed['street_raw'] = ' '.join(raw_street_parts) + + # Build the full expanded street + if expanded_parts: + # The street is everything after the number + street_full = ' '.join(expanded_parts[1:] if number else expanded_parts) + parsed['street'] = street_full + + if place: + parsed['city'] = place + expanded_parts.append(place) + if state: + parsed['state'] = state.upper() + expanded_parts.append(state) + if zipcode: + parsed['zipcode'] = zipcode + expanded_parts.append(zipcode) + + parsed['expanded_query'] = ' '.join(expanded_parts) + + # ── Intent classification ── + if addr_type == 'Street Address' and number: + return 'ADDRESS', parsed + elif zipcode and not number and not street_name: + return 'POSTCODE', parsed + elif addr_type == 'Ambiguous': + # Check if it looks like a locality: 2 tokens, second is a state code + tokens = q.replace(',', ' ').split() + if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = tokens[-1].upper() + return 'LOCALITY', parsed + return 'UNKNOWN', parsed + else: + return 'UNKNOWN', parsed + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 2: RETRIEVAL +# ═══════════════════════════════════════════════════════════════════ + +def _retrieve_netsyms(parsed, limit=10): + """Query Netsyms for structured address lookup. Returns list of candidate dicts.""" + try: + from . import netsyms + except Exception: + return [] + + results = [] + number = parsed.get('number', '') + street = parsed.get('street_raw') or parsed.get('street', '') + city = parsed.get('city', '') + state = parsed.get('state', '') + zipcode = parsed.get('zipcode', '') + + if number and street: + rows = netsyms.lookup_by_street( + number, street, city=city, state=state, zipcode=zipcode, limit=limit + ) + elif zipcode: + rows = netsyms.lookup_by_zipcode(zipcode, limit=limit) + else: + return [] + + for row in rows: + addr_parts = [row['number'], row['street']] + if row.get('street2'): + addr_parts.append(row['street2']) + addr_parts.extend([row['city'], row['state'], row['zipcode']]) + display = ' '.join(p for p in addr_parts if p) + results.append({ + 'name': display, + 'lat': row['lat'], + 'lon': row['lon'], + 'source': 'netsyms', + 'type': 'street_address', + 'raw': row, + '_number': row.get('number', ''), + '_street': row.get('street', ''), + '_city': row.get('city', ''), + '_state': row.get('state', ''), + }) + return results + + +def _retrieve_photon_structured(parsed, limit=10): + """Query Photon /structured endpoint for address lookup.""" + params = {'limit': limit, 'countrycode': 'US'} + if parsed.get('street'): + params['street'] = parsed['street'] + if parsed.get('number'): + params['housenumber'] = parsed['number'] + if parsed.get('city'): + params['city'] = parsed['city'] + if parsed.get('state'): + params['state'] = parsed['state'] + + if 'street' not in params: + return [] + + try: + resp = requests.get(f"{PHOTON_URL}/structured", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + logger.debug("Photon /structured failed: %s", e) + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _retrieve_photon_freetext(query, limit=10): + """Query Photon /api for free-text search with location bias.""" + try: + params = { + 'q': query, + 'limit': limit, + 'lat': GEOCODE_BIAS_LAT, + 'lon': GEOCODE_BIAS_LON, + 'zoom': GEOCODE_BIAS_ZOOM, + } + resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + logger.debug("Photon /api failed: %s", e) + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _parse_photon_features(features, source): + """Convert Photon GeoJSON features to candidate dicts.""" + results = [] + for i, feature in enumerate(features): + props = feature.get('properties', {}) + coords = feature.get('geometry', {}).get('coordinates', [0, 0]) + + osm_key = props.get('osm_key', '') + osm_value = props.get('osm_value', '') + feat_type = props.get('type', '') + has_hn = bool(props.get('housenumber')) + + if has_hn or osm_value in ('house', 'residential'): + rtype = 'street_address' + elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): + rtype = 'locality' + elif osm_key in ('amenity', 'shop', 'tourism', 'leisure'): + rtype = 'poi' + else: + rtype = 'poi' + + # Build display name + parts = [] + hn = props.get('housenumber') + street = props.get('street') + name = props.get('name', '') + if hn and street: + parts.append(f"{hn} {street}") + if name and name != street: + parts.append(name) + elif name: + parts.append(name) + elif street: + parts.append(street) + for key in ('city', 'county', 'state', 'country'): + v = props.get(key) + if v and (not parts or v != parts[-1]): + parts.append(v) + display = ', '.join(p for p in parts if p) or 'Unknown' + + results.append({ + 'name': display, + 'lat': coords[1], + 'lon': coords[0], + 'source': source, + 'type': rtype, + 'raw': props, + '_photon_rank': i, + '_number': props.get('housenumber', ''), + '_street': props.get('street', ''), + '_city': props.get('city', ''), + '_state': props.get('state', ''), + }) + return results + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 3: RERANKER +# ═══════════════════════════════════════════════════════════════════ + +def _expand_street_type(s): + """Expand a street type abbreviation for comparison.""" + return _STREET_TYPE_ABBREVS.get(s.lower(), s.lower()) + + +def _score_candidate(candidate, parsed, intent): + """ + Score a candidate against the parsed query. + Returns (total_score, signal_breakdown_dict). + """ + signals = {} + total = 0.0 + + query_number = (parsed.get('number') or '').strip().upper() + query_street = (parsed.get('street') or '').strip().upper() + query_city = (parsed.get('city') or '').strip().upper() + query_state = (parsed.get('state') or '').strip().upper() + + cand_number = (candidate.get('_number') or '').strip().upper() + cand_street = (candidate.get('_street') or '').strip().upper() + cand_city = (candidate.get('_city') or '').strip().upper() + cand_state = (candidate.get('_state') or '').strip().upper() + + # ── Housenumber ── + if intent == 'ADDRESS' and query_number: + if cand_number == query_number: + signals['housenumber_exact'] = W_HOUSENUMBER_EXACT + total += W_HOUSENUMBER_EXACT + elif cand_number and cand_number != query_number: + signals['housenumber_mismatch'] = W_HOUSENUMBER_MISMATCH + total += W_HOUSENUMBER_MISMATCH + + # ── Street name fuzz ── + if query_street and cand_street: + # Expand both for comparison + q_expanded = ' '.join(_expand_street_type(t) for t in query_street.split()) + c_expanded = ' '.join(_expand_street_type(t) for t in cand_street.split()) + ratio = fuzz.token_sort_ratio(q_expanded, c_expanded) / 100.0 + score = ratio * W_STREET_NAME_FUZZ + signals['street_name_fuzz'] = round(score, 2) + total += score + + # ── Street type match ── + if query_street and cand_street: + q_tokens = set(_expand_street_type(t) for t in query_street.split()) + c_tokens = set(_expand_street_type(t) for t in cand_street.split()) + # Check if the street type words overlap + street_types = set(_STREET_TYPE_ABBREVS.values()) + q_types = q_tokens & street_types + c_types = c_tokens & street_types + if q_types and q_types & c_types: + signals['street_type_match'] = W_STREET_TYPE_MATCH + total += W_STREET_TYPE_MATCH + + # ── Token coverage ── + raw_q = parsed.get('raw_query', '').upper() + q_tokens = set(raw_q.replace(',', ' ').split()) + if q_tokens: + cand_text = candidate.get('name', '').upper() + matched = sum(1 for t in q_tokens if t in cand_text) + coverage = matched / len(q_tokens) + score = coverage * W_TOKEN_COVERAGE + signals['token_coverage'] = round(score, 2) + total += score + + # ── Locality fuzz ── + if query_city and cand_city: + ratio = fuzz.ratio(query_city, cand_city) / 100.0 + score = ratio * W_LOCALITY_FUZZ + signals['locality_fuzz'] = round(score, 2) + total += score + + # ── State exact ── + if query_state and cand_state: + if cand_state == query_state: + signals['state_exact'] = W_STATE_EXACT + total += W_STATE_EXACT + + # ── Source authority ── + if candidate.get('source') == 'netsyms' and intent == 'ADDRESS': + signals['source_authority'] = W_SOURCE_AUTHORITY + total += W_SOURCE_AUTHORITY + + # ── Layer rank (type-appropriate bonus) ── + cand_type = candidate.get('type', '') + if intent == 'ADDRESS' and cand_type == 'street_address': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'LOCALITY' and cand_type == 'locality': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'POI' and cand_type == 'poi': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + + # ── Photon position normalization ── + photon_rank = candidate.get('_photon_rank') + if photon_rank is not None: + # Top result gets full bonus, decays linearly + score = max(0, (1.0 - photon_rank / 10.0)) * W_PHOTON_POSITION_NORM + signals['photon_position'] = round(score, 2) + total += score + + return round(total, 2), signals + + +def _build_match_code(candidate, parsed, intent): + """Build a match_code dict indicating match quality for each field.""" + mc = {} + if intent == 'ADDRESS': + q_num = (parsed.get('number') or '').strip().upper() + c_num = (candidate.get('_number') or '').strip().upper() + if q_num and c_num == q_num: + mc['housenumber'] = 'matched' + elif q_num and c_num: + mc['housenumber'] = 'unmatched' + elif q_num and not c_num: + mc['housenumber'] = 'inferred' + + q_street = (parsed.get('street') or '').strip().upper() + c_street = (candidate.get('_street') or '').strip().upper() + if q_street and c_street: + q_exp = ' '.join(_expand_street_type(t) for t in q_street.split()) + c_exp = ' '.join(_expand_street_type(t) for t in c_street.split()) + ratio = fuzz.token_sort_ratio(q_exp, c_exp) / 100.0 + mc['street'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_street: + mc['street'] = 'inferred' + + q_city = (parsed.get('city') or '').strip().upper() + c_city = (candidate.get('_city') or '').strip().upper() + if q_city and c_city: + ratio = fuzz.ratio(q_city, c_city) / 100.0 + mc['city'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_city: + mc['city'] = 'inferred' + + return mc + + +def _rerank(candidates, parsed, intent, query, limit): + """Score, sort, and trim candidates. Trace-log top 3.""" + scored = [] + for c in candidates: + total, signals = _score_candidate(c, parsed, intent) + c['_score'] = total + c['_signals'] = signals + scored.append(c) + + scored.sort(key=lambda c: c['_score'], reverse=True) + + # Trace log for audit + _trace_logger.debug("─── Query: %r intent=%s ───", query, intent) + for i, c in enumerate(scored[:3]): + _trace_logger.debug( + " #%d score=%.2f src=%s name=%s", + i, c['_score'], c.get('source', '?'), c.get('name', '?')[:60] + ) + _trace_logger.debug(" signals=%s", c.get('_signals', {})) + + # Clean internal fields and add match_code + result = [] + for c in scored[:limit]: + mc = _build_match_code(c, parsed, intent) + + # Assign confidence from score + score = c.get('_score', 0) + if score >= 10: + confidence = 'exact' + elif score >= 5: + confidence = 'high' + elif score >= 2: + confidence = 'medium' + else: + confidence = 'low' + + entry = { + 'name': c['name'], + 'lat': c['lat'], + 'lon': c['lon'], + 'source': c['source'], + 'confidence': confidence, + 'type': c.get('type', 'poi'), + 'raw': c.get('raw'), + } + if mc: + entry['match_code'] = mc + result.append(entry) + + return result + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 4: ANNOTATION +# ═══════════════════════════════════════════════════════════════════ + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters.""" + R = 6_371_000 + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _annotate_with_address_book(results): + """Add labeled_as to results within radius of an address book entry.""" + try: + from . import address_book + entries = address_book.load() + except Exception: + return + for result in results: + rlat, rlon = result.get('lat'), result.get('lon') + if rlat is None or rlon is None: + continue + for entry in entries: + elat, elon = entry.get('lat'), entry.get('lon') + if elat is None or elon is None: + continue + if _haversine_m(rlat, rlon, elat, elon) <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: + result['labeled_as'] = entry['name'] + break + + +# ═══════════════════════════════════════════════════════════════════ +# PUBLIC API +# ═══════════════════════════════════════════════════════════════════ + +def geocode(query, limit=10): + """ + Structured geocoding with multi-source retrieval and reranking. + + Returns {query, results: [...], count} — always 200-safe. + """ + limit = max(1, min(limit, 20)) + q = (query or '').strip() + empty = {'query': q, 'results': [], 'count': 0} + + if not q: + return empty + + # ── Coordinate detection ── + coords = _parse_coords(q) + if coords: + return { + 'query': q, + 'results': [{ + 'name': q, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'confidence': 'exact', + 'type': 'coordinates', + 'raw': None, + }], + 'count': 1, + } + + # ── Address book nickname short-circuit ── + normalized_q = ' '.join(q.lower().replace(',', ' ').split()) + is_single_word = ' ' not in normalized_q + try: + from . import address_book + ab_match = address_book.lookup(q) + if (ab_match + and ab_match['confidence'] == 'exact' + and ab_match.get('lat') and ab_match.get('lon') + and is_single_word): + logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) + return { + 'query': q, + 'results': [{ + 'name': ab_match.get('address') or ab_match['name'], + 'lat': ab_match['lat'], + 'lon': ab_match['lon'], + 'source': 'address_book', + 'confidence': 'exact', + 'type': 'nickname', + 'raw': ab_match, + }], + 'count': 1, + } + except Exception as e: + logger.debug("geocode: address_book lookup failed: %s", e) + + # ── Classify intent + parse ── + intent, parsed = _classify_and_parse(q) + logger.debug("geocode: intent=%s parsed=%s", intent, parsed) + + # ── Retrieve candidates ── + candidates = [] + + if intent == 'ADDRESS': + # Parallel: Netsyms (structured) + Photon (freetext with expanded query) + netsyms_results = _retrieve_netsyms(parsed, limit=limit) + photon_results = _retrieve_photon_freetext( + parsed.get('expanded_query', q), limit=limit + ) + # Also try Photon /structured for addresses + photon_struct = _retrieve_photon_structured(parsed, limit=5) + candidates = netsyms_results + photon_results + photon_struct + + elif intent == 'POSTCODE': + netsyms_results = _retrieve_netsyms(parsed, limit=limit) + photon_results = _retrieve_photon_freetext(q, limit=limit) + candidates = netsyms_results + photon_results + + elif intent in ('LOCALITY', 'POI', 'UNKNOWN'): + candidates = _retrieve_photon_freetext(q, limit=limit) + + # ── Deduplicate by (lat, lon) proximity ── + deduped = [] + for c in candidates: + is_dup = False + for existing in deduped: + if (_haversine_m(c['lat'], c['lon'], existing['lat'], existing['lon']) < 50 + and c.get('source') == existing.get('source')): + is_dup = True + break + if not is_dup: + deduped.append(c) + candidates = deduped + + # ── Rerank ── + results = _rerank(candidates, parsed, intent, q, limit) + + # ── Address book annotation ── + _annotate_with_address_book(results) + + logger.info("geocode: %r → intent=%s, %d results", q, intent, len(results)) + return {'query': q, 'results': results, 'count': len(results)} diff --git a/lib/geocode_test.py b/lib/geocode_test.py new file mode 100644 index 0000000..4717b1e --- /dev/null +++ b/lib/geocode_test.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Tests for RECON Photon-first geocode chain.""" +import sys +import os +import json +import urllib.request +import urllib.parse + +BASE = "http://localhost:8420" + +TESTS = [ + { + "name": "home → nickname short-circuit", + "query": "home", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "address_book" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "nickname" + ), + }, + { + "name": "214 north st filer → netsyms exact match (multi-word, not nickname)", + "query": "214 north st filer", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "netsyms" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "street_address" + ), + }, + { + "name": "214 North St, Filer, ID → netsyms (case/punctuation)", + "query": "214 North St, Filer, ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", + }, + { + "name": "214 NORTH ST FILER ID → netsyms (uppercase)", + "query": "214 NORTH ST FILER ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", + }, + { + "name": "1600 Pennsylvania Ave Washington DC → White House", + "query": "1600 Pennsylvania Ave Washington DC", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + ), + }, + { + "name": "1600 pennsylvania ave washington dc → lowercase", + "query": "1600 pennsylvania ave washington dc", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "starbucks filer → POI result", + "query": "starbucks filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "filer idaho → locality", + "query": "filer idaho", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + and r["results"][0]["type"] == "locality" + ), + }, + { + "name": "filer → partial query, at least 1 result", + "query": "filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "42.5736, -114.6066 → coordinates (with space)", + "query": "42.5736, -114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "coordinates" + ), + }, + { + "name": "42.5736,-114.6066 → coordinates (no space)", + "query": "42.5736,-114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + ), + }, + { + "name": "boise → at least 1 result", + "query": "boise", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "toronto → CA canary", + "query": "toronto", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "asdfghjklqwerty → empty results, 200 OK", + "query": "asdfghjklqwerty", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, + { + "name": "empty query → empty results", + "query": "", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, +] + +passed = 0 +failed = 0 + +for t in TESTS: + q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q=" + url = f"{BASE}/api/geocode?{q}" + try: + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=10) as resp: + status = resp.status + body = json.loads(resp.read()) + except urllib.error.HTTPError as e: + status = e.code + try: + body = json.loads(e.read()) + except Exception: + body = {} + except Exception as e: + status = 0 + body = {} + print(f" [FAIL] {t['name']}") + print(f" EXCEPTION: {e}") + failed += 1 + continue + + ok = status == 200 and t["check"](body) + tag = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + + top = body.get("results", [{}])[0] if body.get("results") else {} + top_summary = f"source={top.get('source','—')} type={top.get('type','—')} conf={top.get('confidence','—')} name={top.get('name','—')[:50]}" + print(f" [{tag}] {t['name']}") + if not ok: + print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}") + else: + labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else "" + print(f" → {top_summary}{labeled}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/nav_tools.py b/lib/nav_tools.py new file mode 100644 index 0000000..2f91616 --- /dev/null +++ b/lib/nav_tools.py @@ -0,0 +1,168 @@ +"""Navigation tools: geocoding via Photon and routing via Valhalla.""" + +import math +import re +import requests + +from .utils import setup_logging + +logger = setup_logging('recon.nav_tools') + +PHOTON_URL = "http://localhost:2322" +VALHALLA_URL = "http://localhost:8002" + +# Regional bias for Photon searches (Idaho-centric for Matt's use case). +# Adjustable — Photon uses these to rank nearby results higher. +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 + +# Distance threshold (meters) for annotating Photon results with address +# book labels. 75m covers GPS jitter + geocoder imprecision. +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# Coordinate regex — handles comma-separated and space-separated forms. +_COORD_RE = re.compile( + r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$' +) + +VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"} + + +def _parse_coords(text: str): + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" + m = _COORD_RE.match(text.strip()) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon + return None + + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters between two (lat, lon) points.""" + R = 6_371_000 # Earth radius in meters + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def geocode(query: str, limit: int = 10): + """Delegate to the structured geocode module. See lib/geocode.py.""" + from . import geocode as geocode_mod + return geocode_mod.geocode(query, limit=limit) + + +def _geocode(query: str): + """Internal: returns (lat, lon, display_name) tuple for route().""" + result = geocode(query, limit=1) + results = result.get('results', []) + if not results: + raise ValueError(f"Could not find location: {query}") + top = results[0] + return top['lat'], top['lon'], top['name'] + + +def reverse_geocode(lat: float, lon: float) -> str: + """Reverse geocode coordinates via Photon. Returns formatted address string.""" + try: + resp = requests.get( + f"{PHOTON_URL}/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + data = resp.json() + features = data.get("features", []) + if not features: + return f"{lat}, {lon}" + + props = features[0]["properties"] + parts = [] + for key in ("name", "housenumber", "street", "city", "state", "country", "postcode"): + v = props.get(key) + if v: + parts.append(v) + return ", ".join(parts) if parts else f"{lat}, {lon}" + + +def route(origin: str, destination: str, mode: str = "auto") -> dict: + """ + Get a route between two locations. + + Args: + origin: Starting location — address, place name, or "lat,lon" + destination: Destination — address, place name, or "lat,lon" + mode: Travel mode — auto, pedestrian, bicycle, truck + + Returns: + dict with summary, maneuvers, origin/destination info, and raw shape + """ + if mode not in VALID_MODES: + mode = "auto" + + # Geocode both endpoints + orig_lat, orig_lon, orig_name = _geocode(origin) + dest_lat, dest_lon, dest_name = _geocode(destination) + + # Query Valhalla + valhalla_req = { + "locations": [ + {"lat": orig_lat, "lon": orig_lon}, + {"lat": dest_lat, "lon": dest_lon}, + ], + "costing": mode, + "directions_options": {"units": "miles"}, + } + + try: + resp = requests.post( + f"{VALHALLA_URL}/route", + json=valhalla_req, + timeout=30, + ) + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + if resp.status_code != 200: + try: + err = resp.json() + msg = err.get("error", "Unknown routing error") + except Exception: + msg = f"Routing error (HTTP {resp.status_code})" + raise RuntimeError(f"No route found between locations: {msg}") + + data = resp.json() + trip = data["trip"] + summary = trip["summary"] + leg = trip["legs"][0] + + # Build maneuver list + maneuvers = [] + for m in leg["maneuvers"]: + streets = m.get("street_names", []) + maneuvers.append({ + "instruction": m["instruction"], + "distance_miles": round(m.get("length", 0), 2), + "street_name": streets[0] if streets else "", + "type": m.get("type", 0), + "verbal_succinct": m.get("verbal_succinct_transition_instruction", ""), + }) + + return { + "origin": {"name": orig_name, "lat": orig_lat, "lon": orig_lon}, + "destination": {"name": dest_name, "lat": dest_lat, "lon": dest_lon}, + "summary": { + "distance_miles": round(summary["length"], 1), + "time_minutes": round(summary["time"] / 60, 1), + "mode": mode, + }, + "maneuvers": maneuvers, + "shape": leg.get("shape", ""), + } diff --git a/lib/nav_tools_test.py b/lib/nav_tools_test.py new file mode 100644 index 0000000..b987293 --- /dev/null +++ b/lib/nav_tools_test.py @@ -0,0 +1,77 @@ +"""Tests for nav_tools — run against live Photon + Valhalla services.""" + +import sys +import json + +from nav_tools import route, reverse_geocode + + +def test_route_named(): + """route("Buhl Idaho", "Boise Idaho", "auto") returns maneuvers.""" + print("TEST 1: route('Buhl Idaho', 'Boise Idaho', 'auto')") + r = route("Buhl Idaho", "Boise Idaho", "auto") + assert r["summary"]["distance_miles"] > 50, f"Expected >50 mi, got {r['summary']['distance_miles']}" + assert r["summary"]["time_minutes"] > 60, f"Expected >60 min, got {r['summary']['time_minutes']}" + assert len(r["maneuvers"]) > 5, f"Expected >5 maneuvers, got {len(r['maneuvers'])}" + assert r["shape"], "Missing polyline shape" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min, {len(r['maneuvers'])} maneuvers") + print(f" Origin: {r['origin']['name']}") + print(f" Destination: {r['destination']['name']}") + print(f" First maneuver: {r['maneuvers'][0]['instruction']}") + + +def test_route_coords(): + """route with raw lat,lon coordinates.""" + print("\nTEST 2: route('42.5991,-114.7636', '43.615,-116.2023', 'auto')") + r = route("42.5991,-114.7636", "43.615,-116.2023", "auto") + assert r["summary"]["distance_miles"] > 100, f"Expected >100 mi, got {r['summary']['distance_miles']}" + assert len(r["maneuvers"]) > 3, f"Expected >3 maneuvers" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min") + + +def test_route_pedestrian(): + """route with pedestrian mode.""" + print("\nTEST 3: route('Buhl Idaho', 'Boise Idaho', 'pedestrian')") + r = route("Buhl Idaho", "Boise Idaho", "pedestrian") + assert r["summary"]["mode"] == "pedestrian" + assert r["summary"]["time_minutes"] > r["summary"]["distance_miles"], "Walking should take more min than miles" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min (pedestrian)") + + +def test_reverse_geocode(): + """reverse_geocode near Buhl, Idaho.""" + print("\nTEST 4: reverse_geocode(42.5991, -114.7636)") + result = reverse_geocode(42.5991, -114.7636) + assert "Buhl" in result or "Twin Falls" in result or "Idaho" in result, f"Expected Buhl/Idaho, got: {result}" + print(f" OK — {result}") + + +def test_route_bad_origin(): + """route with nonexistent place returns clean error.""" + print("\nTEST 5: route('nonexistent place xyz123abc', 'Boise Idaho')") + try: + r = route("nonexistent place xyz123abc", "Boise Idaho") + print(f" FAIL — expected error, got result: {r['summary']}") + return False + except ValueError as e: + print(f" OK — clean error: {e}") + except RuntimeError as e: + print(f" OK — runtime error: {e}") + + +if __name__ == "__main__": + passed = 0 + failed = 0 + tests = [test_route_named, test_route_coords, test_route_pedestrian, test_reverse_geocode, test_route_bad_origin] + + for test in tests: + try: + test() + passed += 1 + except Exception as e: + print(f" FAIL — {e}") + failed += 1 + + print(f"\n{'='*40}") + print(f"Results: {passed} passed, {failed} failed out of {len(tests)}") + sys.exit(1 if failed else 0) diff --git a/lib/netsyms.py b/lib/netsyms.py new file mode 100644 index 0000000..d51162e --- /dev/null +++ b/lib/netsyms.py @@ -0,0 +1,228 @@ +""" +RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup. + +Provides 159.78M geocoded addresses as tier-2 between address book +(exact named locations) and Photon (full-text global geocoding). + +Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only) +""" + +import os +import re +import sqlite3 +import threading + +from .utils import setup_logging + +logger = setup_logging('recon.netsyms') + +_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite' + +_conn = None +_lock = threading.Lock() +_cached_row_count = None + +# US states + DC + territories, CA provinces, for free-text parsing +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', + 'DC', 'PR', 'VI', 'GU', 'AS', 'MP', + # Canadian provinces + 'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE', + 'QC', 'SK', 'YT', +} + +_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$') + + +def _get_conn(): + """Lazy-open a read-only SQLite connection.""" + global _conn + if _conn is not None: + return _conn + with _lock: + if _conn is not None: + return _conn + uri = f'file:{_DB_PATH}?mode=ro' + _conn = sqlite3.connect(uri, uri=True, check_same_thread=False) + _conn.row_factory = sqlite3.Row + logger.info("Netsyms DB opened: %s", _DB_PATH) + return _conn + + +def _row_to_dict(row): + """Convert a sqlite3.Row to a plain dict with lat/lon keys.""" + return { + 'zipcode': row['zipcode'], + 'number': row['number'], + 'street': row['street'], + 'street2': row['street2'], + 'city': row['city'], + 'state': row['state'], + 'plus4': row['plus4'], + 'country': row['country'], + 'lat': float(row['latitude']), + 'lon': float(row['longitude']), + 'source': row['source'], + } + + +def lookup_by_street(number, street, city=None, state=None, + zipcode=None, country=None, limit=20): + """Match on number + street, with optional qualifiers.""" + conn = _get_conn() + clauses = ['number = ?', 'street = ?'] + params = [str(number).strip().upper(), street.strip().upper()] + + if city: + clauses.append('city = ?') + params.append(city.strip().upper()) + if state: + clauses.append('state = ?') + params.append(state.strip().upper()) + if zipcode: + clauses.append('zipcode = ?') + params.append(zipcode.strip()) + if country: + clauses.append('country = ?') + params.append(country.strip().upper()) + + sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?" + params.append(limit) + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_street error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results", + number, street, city, state, len(results)) + return results + + +def lookup_free_text(query, country_hint=None): + """Parse a free-text address and look it up.""" + q = query.strip() + if not q: + return [] + + # Strip trailing zipcode if present + zipcode = None + zip_match = re.search(r'\b(\d{5})\s*$', q) + if zip_match: + zipcode = zip_match.group(1) + q = q[:zip_match.start()].strip().rstrip(',').strip() + + # Strip trailing state + tokens = re.split(r'[,\s]+', q) + tokens = [t for t in tokens if t] + if not tokens: + return [] + + state = None + if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: + state = tokens[-1].upper() + tokens = tokens[:-1] + + # Leading digits → number + number = None + if tokens and re.match(r'^\d', tokens[0]): + number = tokens[0] + tokens = tokens[1:] + + if not tokens: + # Only a number, or empty — try zipcode if we have one + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + return [] + + # If state was found and we have 2+ tokens remaining, last token is city + city = None + if state and len(tokens) >= 2: + city = tokens[-1] + tokens = tokens[:-1] + + street = ' '.join(tokens) + + if number: + results = lookup_by_street(number, street, city=city, state=state, + zipcode=zipcode, country=country_hint) + if results: + logger.debug("lookup_free_text(%r) → %d results via street match", + query, len(results)) + return results + + # Fallback: try zipcode only if available + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + + logger.debug("lookup_free_text(%r) → 0 results", query) + return [] + + +def lookup_by_zipcode(zipcode, limit=100): + """Direct zipcode lookup.""" + conn = _get_conn() + sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?" + params = [zipcode.strip(), limit] + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_zipcode error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results)) + return results + + +def health(): + """Health check with cached row count.""" + global _cached_row_count + + try: + file_size = os.path.getsize(_DB_PATH) + except OSError: + return {'ok': False, 'row_count': 0, 'file_size_bytes': 0, + 'indexed_countries': []} + + try: + conn = _get_conn() + except Exception: + return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size, + 'indexed_countries': []} + + if _cached_row_count is None: + with _lock: + if _cached_row_count is None: + try: + row = conn.execute( + "SELECT COUNT(*) AS cnt FROM addresses" + ).fetchone() + _cached_row_count = row['cnt'] + except sqlite3.Error: + _cached_row_count = 0 + + with _lock: + try: + rows = conn.execute( + "SELECT DISTINCT country FROM addresses" + ).fetchall() + countries = sorted(r['country'] for r in rows) + except sqlite3.Error: + countries = [] + + return { + 'ok': True, + 'row_count': _cached_row_count, + 'file_size_bytes': file_size, + 'indexed_countries': countries, + } diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py new file mode 100644 index 0000000..92c8b6e --- /dev/null +++ b/lib/netsyms_api.py @@ -0,0 +1,108 @@ +""" +RECON Netsyms API + Geocode — Flask Blueprints. + +GET /api/netsyms/lookup?q=&country= +GET /api/netsyms/health +GET /api/geocode?q=&limit= (Photon-first search with ranked results) +""" + +from flask import Blueprint, request, jsonify + +from . import netsyms +from . import address_book +from . import nav_tools +from .utils import setup_logging + +logger = setup_logging('recon.netsyms_api') + +netsyms_bp = Blueprint('netsyms', __name__) +geocode_bp = Blueprint('geocode', __name__) + + +@netsyms_bp.route('/api/netsyms/lookup') +def api_netsyms_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + country = request.args.get('country', '').strip() or None + results = netsyms.lookup_free_text(q, country_hint=country) + return jsonify({'results': results, 'count': len(results), 'query': q}) + + +@netsyms_bp.route('/api/netsyms/health') +def api_netsyms_health(): + return jsonify(netsyms.health()) + + +@geocode_bp.route('/api/geocode') +def api_geocode(): + """ + Photon-first geocoding with ranked candidates. + + GET /api/geocode?q=&limit= + + Always returns 200 OK with: + {query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count} + + - source: "address_book" | "coordinates" | "photon" + - confidence: "exact" | "high" | "medium" | "low" + - type: "nickname" | "coordinates" | "street_address" | "poi" | "locality" + - labeled_as: present when result is within 75m of an address book entry + - Empty results array is valid (no match). No 404s. + """ + q = request.args.get('q', '').strip() + limit = request.args.get('limit', '10') + try: + limit = max(1, min(int(limit), 20)) + except (ValueError, TypeError): + limit = 10 + + result = nav_tools.geocode(q, limit=limit) + return jsonify(result) + + +@geocode_bp.route('/api/reverse') +def api_reverse(): + """ + Reverse geocode coordinates via Photon. + + GET /api/reverse?lat=X&lon=Y + + Returns same shape as /api/geocode: + {query: "lat,lon", results: [{name, lat, lon, source, type, raw, ...}], count} + + Returns 200 OK with empty results on no match. 400 on invalid coords. + """ + try: + lat = float(request.args.get('lat', '')) + lon = float(request.args.get('lon', '')) + except (ValueError, TypeError): + return jsonify({'error': 'Missing or invalid lat/lon parameters'}), 400 + + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'Coordinates out of range'}), 400 + + query_str = f"{lat},{lon}" + + try: + import requests as http_requests + resp = http_requests.get( + "http://localhost:2322/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + data = resp.json() + features = data.get("features", []) + except Exception: + logger.warning("Photon reverse geocode failed for %s", query_str) + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + if not features: + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + from .geocode import _parse_photon_features + results = _parse_photon_features(features, source='photon_reverse') + + return jsonify({'query': query_str, 'results': results, 'count': len(results)}) diff --git a/lib/netsyms_test.py b/lib/netsyms_test.py new file mode 100644 index 0000000..ed70472 --- /dev/null +++ b/lib/netsyms_test.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Tests for Netsyms address database module.""" + +import sys +import os + +# Ensure the lib directory is importable +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import netsyms + + +def test_lookup_by_street_lowercase(): + results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}" + print(" PASS: lookup_by_street (lowercase)") + + +def test_lookup_by_street_uppercase(): + results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + print(" PASS: lookup_by_street (uppercase)") + + +def test_lookup_nonexistent(): + results = netsyms.lookup_by_street("999999", "Nonexistent Rd", + city="Filer", state="ID") + assert results == [], f"Expected empty list, got {len(results)} results" + print(" PASS: lookup_by_street (nonexistent)") + + +def test_free_text_with_commas(): + results = netsyms.lookup_free_text("214 North St, Filer, ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['city'] == 'FILER', f"City mismatch: {r['city']}" + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (commas)") + + +def test_free_text_no_commas(): + results = netsyms.lookup_free_text("214 North St Filer ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (no commas)") + + +def test_lookup_by_zipcode(): + results = netsyms.lookup_by_zipcode("83328", limit=5) + assert len(results) == 5, f"Expected 5 results, got {len(results)}" + for r in results: + assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}" + print(" PASS: lookup_by_zipcode") + + +def test_health(): + h = netsyms.health() + assert h['ok'] is True, f"Health not OK: {h}" + assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}" + assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}" + assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}" + print(" PASS: health") + + +if __name__ == '__main__': + print("Running Netsyms tests...") + test_lookup_by_street_lowercase() + test_lookup_by_street_uppercase() + test_lookup_nonexistent() + test_free_text_with_commas() + test_free_text_no_commas() + test_lookup_by_zipcode() + test_health() + print("All tests passed.") diff --git a/lib/query_router.py b/lib/query_router.py new file mode 100644 index 0000000..dda14a2 --- /dev/null +++ b/lib/query_router.py @@ -0,0 +1,161 @@ +"""Semantic query router for Aurora. + +Classifies user queries into routes (nav_route, nav_reverse_geocode, +direct_answer, rag_search) by comparing query embeddings against +pre-computed route centroids from example queries. + +TEI endpoint: http://100.64.0.14:8090/embed (cortex via Tailscale) +""" + +import math +import threading +import requests + +# ── Route examples ──────────────────────────────────────────────────────────── +ROUTE_EXAMPLES = { + "nav_route": [ + "how do I get to Boise", + "directions to Twin Falls", + "how do I get from Buhl to Boise", + "drive from Jerome to Sun Valley", + "route from Boise to McCall", + "what's the fastest way to Sun Valley", + "how far is it to Twin Falls", + "take me to Shoshone", + "navigate to the airport", + "how do I drive to Salt Lake City", + "walking directions to the park", + "bike route to downtown", + ], + "nav_reverse_geocode": [ + "what town is at 42.5, -114.7", + "where am I right now", + "what is at coordinates 43.6, -116.2", + "what location is 42.574, -114.607", + "where is this place 44.0, -114.3", + "what city is near 42.7, -114.5", + "reverse geocode 43.0, -115.0", + "what's at this location 42.9, -114.8", + ], + "direct_answer": [ + "hello", + "hey aurora", + "good morning", + "thanks", + "thank you", + "what's your name", + "who are you", + "tell me a joke", + "how are you", + "hi there", + ], + "rag_search": [ + "what does the survival manual say about water", + "how to purify water in the field", + "how to treat a gunshot wound", + "what is the ranger handbook chapter on patrolling", + "field manual water purification", + "how to build a shelter in the wilderness", + "tactical combat casualty care procedures", + "what does FM 21-76 say about fire starting", + ], +} + +# ── Module-level cache ──────────────────────────────────────────────────────── +_ROUTE_CENTROIDS: dict | None = None +_LOCK = threading.Lock() + + +def _embed_batch(texts: list[str], tei_url: str) -> list[list[float]]: + """Embed a batch of texts via TEI.""" + resp = requests.post(tei_url, json={"inputs": texts}, timeout=30) + resp.raise_for_status() + return resp.json() + + +def _compute_centroid(vectors: list[list[float]]) -> list[float]: + """Element-wise mean of vectors.""" + n = len(vectors) + dim = len(vectors[0]) + centroid = [0.0] * dim + for vec in vectors: + for i in range(dim): + centroid[i] += vec[i] + for i in range(dim): + centroid[i] /= n + return centroid + + +def _cosine_similarity(a: list[float], b: list[float]) -> float: + """Cosine similarity between two vectors (pure Python).""" + dot = 0.0 + norm_a = 0.0 + norm_b = 0.0 + for i in range(len(a)): + dot += a[i] * b[i] + norm_a += a[i] * a[i] + norm_b += b[i] * b[i] + denom = math.sqrt(norm_a) * math.sqrt(norm_b) + if denom == 0: + return 0.0 + return dot / denom + + +def _ensure_centroids(tei_url: str) -> dict[str, list[float]]: + """Lazy-init: embed all examples in one batch, compute centroids, cache.""" + global _ROUTE_CENTROIDS + if _ROUTE_CENTROIDS is not None: + return _ROUTE_CENTROIDS + + with _LOCK: + if _ROUTE_CENTROIDS is not None: + return _ROUTE_CENTROIDS + + # Flatten all examples into one batch + all_texts = [] + route_ranges: dict[str, tuple[int, int]] = {} + offset = 0 + for route, examples in ROUTE_EXAMPLES.items(): + route_ranges[route] = (offset, offset + len(examples)) + all_texts.extend(examples) + offset += len(examples) + + all_vectors = _embed_batch(all_texts, tei_url) + + centroids = {} + for route, (start, end) in route_ranges.items(): + centroids[route] = _compute_centroid(all_vectors[start:end]) + + _ROUTE_CENTROIDS = centroids + return _ROUTE_CENTROIDS + + +def classify( + query: str, + tei_url: str = "http://100.64.0.14:8090/embed", + threshold: float = 0.45, +) -> tuple[str, float]: + """Classify a query into a route. + + Returns (route_name, confidence). If no route exceeds the threshold, + returns ("rag_search", best_score) as the safe default. + """ + centroids = _ensure_centroids(tei_url) + + # Embed the query + vecs = _embed_batch([query], tei_url) + query_vec = vecs[0] + + # Compare against all centroids + best_route = "rag_search" + best_score = 0.0 + for route, centroid in centroids.items(): + sim = _cosine_similarity(query_vec, centroid) + if sim > best_score: + best_score = sim + best_route = route + + if best_score < threshold: + return ("rag_search", best_score) + + return (best_route, best_score) diff --git a/lib/query_router_test.py b/lib/query_router_test.py new file mode 100644 index 0000000..27ccefd --- /dev/null +++ b/lib/query_router_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Test suite for the semantic query router.""" + +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib.query_router import classify + +TEST_QUERIES = [ + ("how do I get from Buhl to Boise", "nav_route"), + ("what does the survival manual say about water", "rag_search"), + ("what town is at 42.5, -114.7", "nav_reverse_geocode"), + ("hey aurora", "direct_answer"), + ("what's the fastest way to Sun Valley", "nav_route"), + ("how to purify water in the field", "rag_search"), + ("good morning", "direct_answer"), +] + + +def main(): + print("Query Router Test Suite") + print("=" * 70) + + passed = 0 + failed = 0 + + for query, expected in TEST_QUERIES: + route, confidence = classify(query) + status = "PASS" if route == expected else "FAIL" + if status == "PASS": + passed += 1 + else: + failed += 1 + print(f" [{status}] {query!r}") + print(f" → {route} ({confidence:.3f}) expected={expected}") + + print("=" * 70) + print(f"Results: {passed}/{passed + failed} passed") + if failed: + print(f" {failed} FAILED") + sys.exit(1) + else: + print(" All tests passed!") + + +if __name__ == "__main__": + main()