diff --git a/config/profiles/home.yaml b/config/profiles/home.yaml index de704d9..5269812 100644 --- a/config/profiles/home.yaml +++ b/config/profiles/home.yaml @@ -6,13 +6,13 @@ profile: home region_name: "North America" tileset: - url: "/tiles/planet/current.pmtiles" + url: "/tiles/na.pmtiles" bounds: [-168, 14, -52, 72] max_zoom: 15 attribution: "Protomaps © OSM" tileset_hillshade: - url: "/tiles/planet-dem.pmtiles" + url: "/tiles/hillshade-na.pmtiles" encoding: "terrarium" max_zoom: 12 @@ -31,20 +31,16 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" -auth: - login_url: "/outpost.goauthentik.io/start?rd=%2F" - logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" - features: has_nominatim_details: true - has_kiwix_wiki: true + has_kiwix_wiki: false has_hillshade: true has_3d_terrain: false has_traffic_overlay: true has_landclass: true has_public_lands_layer: true has_contours: true - has_contours_test: false + has_contours_test: true has_contours_test_10ft: false has_address_book_write: false has_overture_enrichment: true @@ -52,16 +48,7 @@ features: has_contacts: true has_wiki_rewriting: true has_wiki_discovery: false - has_usfs_trails: true - has_blm_trails: true defaults: center: [42.5736, -114.6066] zoom: 10 - -# Offroute wilderness routing -offroute: - osm_pbf_path: "/mnt/nav/sources/idaho-latest.osm.pbf" - densify_interval_m: 100 - postgis_dsn: "dbname=padus" - diff --git a/config/profiles/minimal_pi.yaml b/config/profiles/minimal_pi.yaml index c2fd90a..e3ae0fd 100644 --- a/config/profiles/minimal_pi.yaml +++ b/config/profiles/minimal_pi.yaml @@ -26,11 +26,6 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" -# TODO(matt): confirm logout next= host for this profile -auth: - login_url: "/outpost.goauthentik.io/start?rd=%2F" - logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" - features: has_nominatim_details: false has_kiwix_wiki: false diff --git a/config/profiles/regional_pi.yaml b/config/profiles/regional_pi.yaml index b6f2cad..8e70cd6 100644 --- a/config/profiles/regional_pi.yaml +++ b/config/profiles/regional_pi.yaml @@ -31,11 +31,6 @@ services: address_book: "/api/address_book" valhalla: "/valhalla" -# TODO(matt): confirm logout next= host for this profile -auth: - login_url: "/outpost.goauthentik.io/start?rd=%2F" - logout_url: "https://auth.echo6.co/if/flow/default-invalidation-flow/?next=https://navi.echo6.co/" - features: has_nominatim_details: true has_kiwix_wiki: false diff --git a/lib/address_book.py b/lib/address_book.py new file mode 100644 index 0000000..f9827f6 --- /dev/null +++ b/lib/address_book.py @@ -0,0 +1,160 @@ +""" +RECON Address Book — YAML-backed saved-location lookup. + +Provides named locations (home, work, etc.) that short-circuit Photon +geocoding when an exact alias match is found. + +Config: /opt/recon/config/address_book.yaml +""" + +import os +import re +import threading + +import yaml + +from .utils import setup_logging + +logger = setup_logging('recon.address_book') + +_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'config', 'address_book.yaml', +) + +_lock = threading.Lock() +_entries: list[dict] = [] +_mtime: float = 0.0 + + +def _reload_if_changed(): + """Reload the YAML file if its mtime has changed.""" + global _entries, _mtime + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + logger.warning("Address book not found: %s", _CONFIG_PATH) + _entries = [] + _mtime = 0.0 + return + + if st.st_mtime == _mtime: + return + + with _lock: + # Double-check after acquiring lock + try: + st = os.stat(_CONFIG_PATH) + except FileNotFoundError: + _entries = [] + _mtime = 0.0 + return + if st.st_mtime == _mtime: + return + + with open(_CONFIG_PATH, 'r') as f: + data = yaml.safe_load(f) or {} + + raw = data.get('entries', []) + loaded = [] + for entry in raw: + # Normalise aliases to lowercase for matching + aliases = [a.lower() for a in entry.get('aliases', [])] + loaded.append({ + 'id': entry.get('id', ''), + 'name': entry.get('name', ''), + 'aliases': aliases, + 'address': entry.get('address', ''), + 'lat': entry.get('lat'), + 'lon': entry.get('lon'), + 'tags': entry.get('tags', []), + }) + _entries = loaded + _mtime = st.st_mtime + logger.info("Address book loaded: %d entries from %s", len(_entries), _CONFIG_PATH) + + +def load(): + """Ensure the address book is loaded (and refreshed if the file changed).""" + _reload_if_changed() + return _entries + + +def _normalize(text: str) -> str: + """Lowercase, strip, remove commas, collapse whitespace.""" + t = text.strip().lower() + t = t.replace(',', ' ') + return ' '.join(t.split()) + + +def lookup(query: str): + """ + Look up a query against name and aliases. + + Returns dict with the matching entry plus a 'confidence' field: + - "exact": full name/alias match, OR query starts with alias + word boundary + - "partial": alias starts with query + word boundary, or alias appears + as a contiguous token sequence inside the query + - None if no match + + Matching order (first exact wins, else first partial): + 1. normalized(query) == normalized(name or alias) → exact + 2. normalized(query) starts with normalized(alias) + " " → exact + 3. normalized(alias) starts with normalized(query) + " " → partial + 4. normalized(alias) is a contiguous token sub-sequence → partial + """ + _reload_if_changed() + q = _normalize(query) + if not q: + return None + + first_exact = None + first_partial = None + + for entry in _entries: + norm_name = _normalize(entry['name']) + check_aliases = [_normalize(a) for a in entry.get('aliases', [])] + all_forms = [norm_name] + check_aliases + + for form in all_forms: + if not form: + continue + + # Rule 1: exact match + if q == form: + return {**entry, 'confidence': 'exact'} + + # Rule 2: query starts with alias + word boundary + if q.startswith(form + ' '): + if first_exact is None: + first_exact = entry + continue + + # Rule 3: alias starts with query (user still typing) + if form.startswith(q) and len(q) < len(form): + if first_partial is None: + first_partial = entry + continue + + # Rule 4: alias is contiguous token sub-sequence in query + # Build regex: token1\s+token2\s+...tokenN + tokens = form.split() + if len(tokens) >= 1: + pattern = r'(?:^|\s)' + r'\s+'.join(re.escape(t) for t in tokens) + r'(?:\s|$)' + if re.search(pattern, q): + if first_partial is None: + first_partial = entry + + if first_exact is not None: + return {**first_exact, 'confidence': 'exact'} + + if first_partial is not None: + return {**first_partial, 'confidence': 'partial'} + + return None + + +def list_all(): + """Return all address book entries.""" + _reload_if_changed() + return list(_entries) diff --git a/lib/address_book_api.py b/lib/address_book_api.py new file mode 100644 index 0000000..020828b --- /dev/null +++ b/lib/address_book_api.py @@ -0,0 +1,31 @@ +""" +RECON Address Book API — Flask Blueprint. + +GET /api/address_book/lookup?q= — best match or 404 +GET /api/address_book/list — all entries +""" + +from flask import Blueprint, request, jsonify + +from . import address_book + +address_book_bp = Blueprint('address_book', __name__) + + +@address_book_bp.route('/api/address_book/lookup') +def api_address_book_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + result = address_book.lookup(q) + if result is None: + return '', 404 + + return jsonify(result) + + +@address_book_bp.route('/api/address_book/list') +def api_address_book_list(): + entries = address_book.list_all() + return jsonify(entries) diff --git a/lib/address_book_test.py b/lib/address_book_test.py new file mode 100644 index 0000000..75905f0 --- /dev/null +++ b/lib/address_book_test.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""Tests for RECON address book module.""" +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import address_book + +TESTS = [ + # ── Existing tests ── + ("lookup('home') → exact", + lambda: address_book.lookup("home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('Home') → exact (case-insensitive)", + lambda: address_book.lookup("Home"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st') → exact via alias", + lambda: address_book.lookup("214 north st"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North Street') → exact via alias", + lambda: address_book.lookup("214 North Street"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('nonexistent place') → None", + lambda: address_book.lookup("nonexistent place"), + lambda r: r is None), + + ("list_all() → 1 entry", + lambda: address_book.list_all(), + lambda r: isinstance(r, list) and len(r) == 1 and r[0]['id'] == 'home'), + + # ── New prefix+boundary tests ── + ("lookup('214 north st filer') → exact (query starts with alias)", + lambda: address_book.lookup("214 north st filer"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 North St Filer ID') → exact (case + trailing state)", + lambda: address_book.lookup("214 North St Filer ID"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214 north st, filer, id') → exact (commas stripped)", + lambda: address_book.lookup("214 north st, filer, id"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('home today') → exact (short alias + trailing text)", + lambda: address_book.lookup("home today"), + lambda r: r is not None and r['confidence'] == 'exact' and r['id'] == 'home'), + + ("lookup('214') → partial (query is prefix of alias)", + lambda: address_book.lookup("214"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('214 n') → partial (partial prefix of alias)", + lambda: address_book.lookup("214 n"), + lambda r: r is not None and r['confidence'] == 'partial'), + + ("lookup('completely unrelated query') → None", + lambda: address_book.lookup("completely unrelated query"), + lambda r: r is None), + + ("lookup('214 north streets of filer') → None (no word boundary after st)", + lambda: address_book.lookup("214 north streets of filer"), + lambda r: r is None), +] + +passed = 0 +failed = 0 +for name, fn, check in TESTS: + try: + result = fn() + ok = check(result) + except Exception as e: + ok = False + result = f"EXCEPTION: {e}" + + status = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + print(f" [{status}] {name}") + if not ok: + print(f" got: {result}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/api.py b/lib/api.py index a0697bf..8a1f383 100644 --- a/lib/api.py +++ b/lib/api.py @@ -17,13 +17,16 @@ import shutil import tempfile import requests as http_requests -from flask import Flask, request, jsonify, redirect, render_template +from flask import Flask, request, jsonify, redirect, render_template, make_response from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue from werkzeug.utils import secure_filename from .utils import get_config, content_hash, clean_filename_to_title, derive_source_and_category, generate_download_url, setup_logging from .status import StatusDB +from .deployment_config import get_deployment_config +from .place_detail import get_place_detail, get_place_by_wikidata +from .landclass import lookup_landclass, format_summary logger = setup_logging('recon.api') @@ -57,9 +60,19 @@ class _LargeZimRequest(_FlaskRequest): return super()._get_file_stream(total_content_length, content_type, filename, content_length) app.request_class = _LargeZimRequest -# ── Netsyms Blueprint ── -from .netsyms_api import netsyms_bp +# ── Address Book Blueprint ── +from .address_book_api import address_book_bp +app.register_blueprint(address_book_bp) + +# ── Contacts Blueprint ── +from .contacts_api import contacts_bp +app.register_blueprint(contacts_bp) + +# ── Netsyms + Geocode Blueprints ── +from .netsyms_api import netsyms_bp, geocode_bp app.register_blueprint(netsyms_bp) +app.register_blueprint(geocode_bp) + # ── Navigation Constants ── @@ -89,6 +102,12 @@ SETTINGS_SUBNAV = [ {'href': '/settings/health', 'label': 'Service Health'}, ] +NAVI_SUBNAV = [ + {'href': '/nav-i', 'label': 'Overview'}, + {'href': '/deleted-contacts', 'label': 'Deleted Contacts'}, + {'href': '/nav-i/api-keys', 'label': 'API Keys'}, +] + def _format_source_citation(payload): """Format a human-readable citation from a search result payload.""" @@ -315,6 +334,36 @@ def failures_page(): failures=failures) +@app.route("/deleted-contacts") +def deleted_contacts_page(): + from .auth import get_user_id + from .contacts import ContactsDB + user_id = get_user_id() or "anonymous" + db = ContactsDB() + contacts = db.list_deleted(user_id) + return render_template("navi/deleted_contacts.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/deleted-contacts", + contacts=contacts) + + +@app.route("/nav-i") +def navi_landing_page(): + from .auth import get_user_id + from .contacts import ContactsDB + user_id = get_user_id() or "anonymous" + db = ContactsDB() + deleted_count = len(db.list_deleted(user_id)) + return render_template("navi/landing.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i", + deleted_count=deleted_count) + + +@app.route("/nav-i/api-keys") +def navi_api_keys_page(): + return render_template("navi/api_keys.html", + domain="navi", subnav=NAVI_SUBNAV, active_page="/nav-i/api-keys") + + @app.route('/peertube') def peertube_dashboard(): return render_template('peertube/dashboard.html', @@ -1159,6 +1208,82 @@ def api_knowledge_stats(): return jsonify(_cache['knowledge_stats']) + +@app.route('/api/traffic/flow///.png') +def api_traffic_flow(z, x, y): + """Proxy TomTom traffic flow tiles to hide API key from frontend.""" + key = os.environ.get('TOMTOM_API_KEY') + if not key: + return 'Traffic service not configured', 503 + url = f'https://api.tomtom.com/traffic/map/4/tile/flow/relative/{z}/{x}/{y}.png?key={key}' + try: + resp = http_requests.get(url, timeout=10) + if resp.status_code != 200: + return 'Upstream error', 502 + r = make_response(resp.content) + r.headers['Content-Type'] = 'image/png' + r.headers['Cache-Control'] = 'public, max-age=120' + return r + except Exception: + return 'Upstream timeout', 504 + + +@app.route('/api/place//') +def api_place_detail(osm_type, osm_id): + """Proxy place details from local Nominatim or Overpass API.""" + result, status = get_place_detail(osm_type, osm_id) + return jsonify(result), status + + +@app.route("/api/place/wikidata/") +def api_place_wikidata(wikidata_id): + """Fetch place details from Wikidata entity.""" + result, status = get_place_by_wikidata(wikidata_id) + return jsonify(result), status + + + +@app.route('/api/landclass') +def api_landclass(): + """PAD-US land classification lookup for a point.""" + config = get_deployment_config() + if not config.get('features', {}).get('has_landclass'): + return jsonify({'error': 'Land classification not available'}), 404 + + try: + lat = float(request.args.get('lat', '')) + lon = float(request.args.get('lon', '')) + except (ValueError, TypeError): + return jsonify({'error': 'lat and lon required as numbers'}), 400 + + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'lat must be -90..90, lon must be -180..180'}), 400 + + classifications = lookup_landclass(lat, lon) + is_public = len(classifications) > 0 + is_private = len(classifications) == 0 + summary = format_summary(classifications) + + return jsonify({ + 'lat': lat, + 'lon': lon, + 'classifications': classifications, + 'count': len(classifications), + 'is_public': is_public, + 'is_private': is_private, + 'summary': summary, + }) + + +@app.route('/api/config') +def api_config(): + """Return deployment profile config for frontend consumption.""" + config = get_deployment_config() + resp = jsonify(config) + resp.headers['Cache-Control'] = 'public, max-age=300' + return resp + + @app.route('/api/health') def api_health(): """Health check endpoint for monitoring.""" @@ -1320,6 +1445,60 @@ def api_keys_reload(): +# ── Nav-I API Key Admin ── + +@app.route('/api/nav-i/api-keys/list', methods=['GET']) +def navi_api_keys_list(): + from .api_keys_admin import list_keys + return jsonify({'keys': list_keys()}) + + +@app.route('/api/nav-i/api-keys/update', methods=['POST']) +def navi_api_keys_update(): + from .auth import require_auth + from .api_keys_admin import update_key, update_gemini_key + data = request.get_json(force=True) + name = data.get('name', '') + new_value = data.get('new_value', '') + index = data.get('index') # optional, for Gemini key replacement + if not name or not new_value: + return jsonify({'error': 'name and new_value required'}), 400 + if name == 'GEMINI_KEY' and index is not None: + result = update_gemini_key(int(index), new_value) + else: + result = update_key(name, new_value) + if result.get('success'): + return jsonify(result) + return jsonify(result), 400 + + +@app.route('/api/nav-i/api-keys/test', methods=['POST']) +def navi_api_keys_test(): + from .api_keys_admin import test_key + data = request.get_json(force=True) + name = data.get('name', '') + index = data.get('index') # optional, for testing specific Gemini key + if not name: + return jsonify({'error': 'name required'}), 400 + result = test_key(name, index=int(index) if index is not None else None) + return jsonify(result) + + +@app.route('/api/nav-i/api-keys/restart-recon', methods=['POST']) +def navi_api_keys_restart(): + import subprocess + try: + result = subprocess.run( + ['sudo', 'systemctl', 'restart', 'recon'], + capture_output=True, text=True, timeout=30 + ) + if result.returncode == 0: + return jsonify({'success': True, 'note': 'RECON service restarted'}) + return jsonify({'success': False, 'error': result.stderr.strip()}), 500 + except subprocess.TimeoutExpired: + return jsonify({'success': False, 'error': 'Restart timed out'}), 500 + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 # ── YouTube Cookie Management ── @@ -2525,3 +2704,21 @@ def api_metrics_history(): return jsonify({'type': metric_type, 'hours': hours, 'points': points}) except Exception as e: return jsonify({'type': metric_type, 'hours': hours, 'points': [], 'error': str(e)}) + + +# ── Auth state endpoint ───────────────────────────────────────────────────── +# Returns current auth state for frontend consumption. +# This endpoint must be behind Caddy forward_auth to receive X-Authentik-* headers. +@app.route('/api/auth/whoami') +def api_auth_whoami(): + """Return auth state for frontend. Behind forward_auth, so headers are present when authenticated.""" + username = request.headers.get('X-Authentik-Username') + if username: + return jsonify({ + 'authenticated': True, + 'username': username, + }) + return jsonify({ + 'authenticated': False, + 'username': None, + }) diff --git a/lib/api_keys_admin.py b/lib/api_keys_admin.py new file mode 100644 index 0000000..3c63565 --- /dev/null +++ b/lib/api_keys_admin.py @@ -0,0 +1,358 @@ +""" +Nav-I API Keys Admin — unified view/update/test for third-party API keys. + +Manages three provider categories: + - Gemini (multiple keys via KeyManager singleton) + - TomTom (single key in .env) + - Google Places (single key in .env) + +All key values are masked in responses. Full values never leave the server +except as user-supplied input on update. +""" +import os +import re +import shutil +import tempfile +import time + +import requests as http_requests + +from .utils import setup_logging + +logger = setup_logging('recon.api_keys_admin') + +ENV_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env') + +# Key definitions: env_name → display metadata +_KEY_DEFS = { + 'TOMTOM_API_KEY': { + 'display_name': 'TomTom', + 'provider': 'tomtom', + }, + 'GOOGLE_PLACES_API_KEY': { + 'display_name': 'Google Places', + 'provider': 'google_places', + }, +} + + +# ── .env read/write helpers ───────────────────────────────────────────── + +def _read_env(): + """Read .env file into a dict of key=value pairs, preserving order.""" + entries = [] # list of (key, value, raw_line) — preserves order and comments + if not os.path.exists(ENV_PATH): + return entries + with open(ENV_PATH, 'r') as f: + for line in f: + raw = line.rstrip('\n') + stripped = raw.strip() + if not stripped or stripped.startswith('#'): + entries.append((None, None, raw)) + continue + m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)=(.*)$', stripped) + if m: + entries.append((m.group(1), m.group(2).strip().strip('"').strip("'"), raw)) + else: + entries.append((None, None, raw)) + return entries + + +def _write_env(entries): + """Atomically write .env from entries list. Backs up to .env.bak first.""" + # Backup current .env + if os.path.exists(ENV_PATH): + bak_path = ENV_PATH + '.bak' + shutil.copy2(ENV_PATH, bak_path) + + # Write to temp file, then rename (atomic on same filesystem) + fd, tmp_path = tempfile.mkstemp(dir=os.path.dirname(ENV_PATH), prefix='.env.', suffix='.tmp') + try: + with os.fdopen(fd, 'w') as f: + for key, value, raw in entries: + if key is not None: + f.write(f'{key}={value}\n') + else: + f.write(raw + '\n') + os.rename(tmp_path, ENV_PATH) + except Exception: + # Clean up temp file on failure + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + logger.info(f"Wrote .env atomically ({len([e for e in entries if e[0]])} keys)") + + +def _get_env_value(name): + """Get a single value from .env by key name.""" + for key, value, _ in _read_env(): + if key == name: + return value + return None + + +def _set_env_value(name, new_value): + """Set a single value in .env. Adds if not present.""" + entries = _read_env() + found = False + for i, (key, value, raw) in enumerate(entries): + if key == name: + entries[i] = (name, new_value, f'{name}={new_value}') + found = True + break + if not found: + entries.append((name, new_value, f'{name}={new_value}')) + _write_env(entries) + + +# ── Masking ───────────────────────────────────────────────────────────── + +def _mask_key(value): + """Mask a key: first 4 chars + '...' + last 4 chars. Never return full value.""" + if not value: + return None + if len(value) <= 8: + return '****' + return value[:4] + '...' + value[-4:] + + +# ── List ──────────────────────────────────────────────────────────────── + +def list_keys(): + """ + Return masked status of all managed API keys. + + Returns list of dicts with: name, display_name, provider, masked_value, + is_set, count (for multi-key providers like Gemini). + """ + result = [] + env_mtime = None + if os.path.exists(ENV_PATH): + env_mtime = time.strftime('%Y-%m-%dT%H:%M:%SZ', + time.gmtime(os.path.getmtime(ENV_PATH))) + + # Gemini keys (via KeyManager) + from .key_manager import get_key_manager + km = get_key_manager() + gemini_keys = km.get_masked_keys() + gemini_count = len(gemini_keys) + # Show a single summary entry for Gemini with count + first_masked = gemini_keys[0]['masked'] if gemini_keys else None + result.append({ + 'name': 'GEMINI_KEY', + 'display_name': 'Gemini', + 'provider': 'gemini', + 'masked_value': first_masked, + 'is_set': gemini_count > 0, + 'count': gemini_count, + 'last_modified': env_mtime, + 'keys': gemini_keys, # full list with per-key stats + }) + + # Single-value keys + for env_name, meta in _KEY_DEFS.items(): + value = _get_env_value(env_name) + result.append({ + 'name': env_name, + 'display_name': meta['display_name'], + 'provider': meta['provider'], + 'masked_value': _mask_key(value), + 'is_set': bool(value), + 'count': 1 if value else 0, + 'last_modified': env_mtime, + }) + + return result + + +# ── Update ────────────────────────────────────────────────────────────── + +def update_key(name, new_value): + """ + Update a key value. For Gemini, name should be 'GEMINI_KEY' with an + optional 'index' for replacing a specific key, or use the KeyManager API. + For TomTom/Google Places, writes directly to .env. + + Returns dict with success status and masked value. + """ + new_value = new_value.strip() + if not new_value: + return {'success': False, 'error': 'Key value cannot be empty'} + + if name == 'GEMINI_KEY': + # Use KeyManager for Gemini + from .key_manager import get_key_manager + km = get_key_manager() + try: + idx = km.add_gemini_key(new_value) + return { + 'success': True, + 'name': name, + 'masked_value': _mask_key(new_value), + 'action': 'added', + 'index': idx, + } + except ValueError as e: + return {'success': False, 'error': str(e)} + + if name in _KEY_DEFS: + _set_env_value(name, new_value) + return { + 'success': True, + 'name': name, + 'masked_value': _mask_key(new_value), + 'action': 'updated', + } + + return {'success': False, 'error': f'Unknown key: {name}'} + + +def update_gemini_key(index, new_value): + """Replace a specific Gemini key by index.""" + new_value = new_value.strip() + if not new_value: + return {'success': False, 'error': 'Key value cannot be empty'} + + from .key_manager import get_key_manager + km = get_key_manager() + try: + km.replace_gemini_key(index, new_value) + return { + 'success': True, + 'name': 'GEMINI_KEY', + 'index': index, + 'masked_value': _mask_key(new_value), + 'action': 'replaced', + } + except (ValueError, IndexError) as e: + return {'success': False, 'error': str(e)} + + +# ── Test ──────────────────────────────────────────────────────────────── + +def test_key(name, index=None): + """ + Test a key against its provider API using the current .env value. + + Returns dict with: success, latency_ms, error, note. + """ + if name == 'GEMINI_KEY': + return _test_gemini(index) + elif name == 'TOMTOM_API_KEY': + return _test_tomtom() + elif name == 'GOOGLE_PLACES_API_KEY': + return _test_google_places() + else: + return {'success': False, 'error': f'Unknown key: {name}', 'latency_ms': 0} + + +def _test_gemini(index=None): + """Test Gemini key by listing models.""" + from .key_manager import get_key_manager + km = get_key_manager() + + if index is not None: + key = km.get_gemini_key(index) + if not key: + return {'success': False, 'error': f'Gemini key index {index} not found', 'latency_ms': 0} + else: + key = km.get_gemini_key(0) + if not key: + return {'success': False, 'error': 'No Gemini keys configured', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.get( + f"https://generativelanguage.googleapis.com/v1beta/models?key={key}", + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200 and 'models' in resp.text: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Models list returned successfully'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Key disabled or quota exhausted'} + elif resp.status_code == 429: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Valid key — currently rate-limited'} + else: + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} + + +def _test_tomtom(): + """Test TomTom key with a minimal geocode request.""" + key = _get_env_value('TOMTOM_API_KEY') + if not key: + return {'success': False, 'error': 'TOMTOM_API_KEY not set', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.get( + f"https://api.tomtom.com/search/2/geocode/Boise.json", + params={'key': key, 'limit': 1}, + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200: + data = resp.json() + count = data.get('summary', {}).get('totalResults', 0) + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': f'Geocode returned {count} result(s)'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Invalid or expired key'} + else: + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} + + +def _test_google_places(): + """Test Google Places (New) API key with a minimal searchText request.""" + key = _get_env_value('GOOGLE_PLACES_API_KEY') + if not key: + return {'success': False, 'error': 'GOOGLE_PLACES_API_KEY not set', 'latency_ms': 0} + + t0 = time.time() + try: + resp = http_requests.post( + "https://places.googleapis.com/v1/places:searchText", + json={'textQuery': 'Boise Idaho', 'maxResultCount': 1}, + headers={ + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'places.displayName', + }, + timeout=10 + ) + latency = int((time.time() - t0) * 1000) + + if resp.status_code == 200: + data = resp.json() + count = len(data.get('places', [])) + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': f'searchText returned {count} place(s)'} + elif resp.status_code == 403: + return {'success': False, 'latency_ms': latency, + 'error': 'Key not authorized for Places API (New)'} + elif resp.status_code == 429: + return {'success': True, 'latency_ms': latency, 'error': None, + 'note': 'Valid key — quota exceeded'} + else: + body = resp.text[:200] + return {'success': False, 'latency_ms': latency, + 'error': f'HTTP {resp.status_code}: {body}'} + except Exception as e: + latency = int((time.time() - t0) * 1000) + return {'success': False, 'latency_ms': latency, 'error': str(e)} diff --git a/lib/contacts.py b/lib/contacts.py new file mode 100644 index 0000000..f2782db --- /dev/null +++ b/lib/contacts.py @@ -0,0 +1,230 @@ +""" +RECON Contacts Database — per-user phone book with soft delete and proximity queries. + +Separate DB at data/contacts.db. Thread-local connections with WAL mode (StatusDB pattern). +""" +import math +import os +import sqlite3 +import threading +from datetime import datetime, timezone + +_local = threading.local() + +_SCHEMA = """ +CREATE TABLE IF NOT EXISTS contacts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id TEXT NOT NULL, + label TEXT NOT NULL, + name TEXT, + call_sign TEXT, + phone TEXT, + email TEXT, + category TEXT, + notes TEXT, + lat REAL, + lon REAL, + osm_type TEXT, + osm_id INTEGER, + address TEXT, + show_proximity INTEGER DEFAULT 0, + created_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), + updated_at TEXT DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')), + deleted_at TEXT, + deleted_by TEXT +); + +CREATE INDEX IF NOT EXISTS idx_contacts_user ON contacts(user_id); +CREATE INDEX IF NOT EXISTS idx_contacts_user_category ON contacts(user_id, category); +CREATE INDEX IF NOT EXISTS idx_contacts_user_deleted ON contacts(user_id, deleted_at); +CREATE INDEX IF NOT EXISTS idx_contacts_geo ON contacts(lat, lon); +CREATE UNIQUE INDEX IF NOT EXISTS idx_contacts_home_work + ON contacts(user_id, label) + WHERE label IN ('Home', 'Work') AND deleted_at IS NULL; +""" + + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters.""" + R = 6_371_000 + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _row_to_dict(row): + """Convert sqlite3.Row to dict, casting show_proximity to bool.""" + d = dict(row) + d['show_proximity'] = bool(d.get('show_proximity', 0)) + return d + + +class ContactsDB: + def __init__(self, db_path=None): + if db_path is None: + db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'contacts.db') + self.db_path = db_path + os.makedirs(os.path.dirname(db_path), exist_ok=True) + self._init_db() + + def _get_conn(self): + if not hasattr(_local, 'contacts_conn') or _local.contacts_conn is None: + _local.contacts_conn = sqlite3.connect(self.db_path, timeout=30) + _local.contacts_conn.row_factory = sqlite3.Row + _local.contacts_conn.execute("PRAGMA journal_mode=WAL") + _local.contacts_conn.execute("PRAGMA busy_timeout=5000") + return _local.contacts_conn + + def _init_db(self): + conn = self._get_conn() + conn.executescript(_SCHEMA) + conn.commit() + + def list_all(self, user_id, category=None, search=None): + conn = self._get_conn() + sql = "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NULL" + params = [user_id] + if category: + sql += " AND category = ?" + params.append(category) + if search: + sql += " AND (label LIKE ? OR name LIKE ? OR call_sign LIKE ? OR phone LIKE ?)" + like = f"%{search}%" + params.extend([like, like, like, like]) + sql += " ORDER BY label" + return [_row_to_dict(r) for r in conn.execute(sql, params).fetchall()] + + def list_deleted(self, user_id): + conn = self._get_conn() + rows = conn.execute( + "SELECT * FROM contacts WHERE user_id = ? AND deleted_at IS NOT NULL ORDER BY deleted_at DESC", + (user_id,) + ).fetchall() + return [_row_to_dict(r) for r in rows] + + def get(self, user_id, contact_id, include_deleted=False): + conn = self._get_conn() + sql = "SELECT * FROM contacts WHERE id = ? AND user_id = ?" + if not include_deleted: + sql += " AND deleted_at IS NULL" + row = conn.execute(sql, (contact_id, user_id)).fetchone() + return _row_to_dict(row) if row else None + + def create(self, user_id, **fields): + conn = self._get_conn() + fields.pop('id', None) + fields.pop('user_id', None) + fields.pop('created_at', None) + fields.pop('updated_at', None) + fields.pop('deleted_at', None) + fields.pop('deleted_by', None) + if 'show_proximity' in fields: + fields['show_proximity'] = 1 if fields['show_proximity'] else 0 + columns = ['user_id'] + list(fields.keys()) + placeholders = ', '.join(['?'] * len(columns)) + col_str = ', '.join(columns) + values = [user_id] + list(fields.values()) + try: + cur = conn.execute(f"INSERT INTO contacts ({col_str}) VALUES ({placeholders})", values) + conn.commit() + return self.get(user_id, cur.lastrowid), None + except sqlite3.IntegrityError: + return None, 'conflict' + + def update(self, user_id, contact_id, **fields): + conn = self._get_conn() + fields.pop('id', None) + fields.pop('user_id', None) + fields.pop('created_at', None) + fields.pop('deleted_at', None) + fields.pop('deleted_by', None) + if 'show_proximity' in fields: + fields['show_proximity'] = 1 if fields['show_proximity'] else 0 + fields['updated_at'] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') + sets = ', '.join(f"{k} = ?" for k in fields) + values = list(fields.values()) + [contact_id, user_id] + conn.execute(f"UPDATE contacts SET {sets} WHERE id = ? AND user_id = ? AND deleted_at IS NULL", values) + conn.commit() + return self.get(user_id, contact_id) + + def soft_delete(self, user_id, contact_id): + conn = self._get_conn() + now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%fZ') + conn.execute( + "UPDATE contacts SET deleted_at = ?, deleted_by = ? WHERE id = ? AND user_id = ? AND deleted_at IS NULL", + (now, user_id, contact_id, user_id) + ) + conn.commit() + return self.get(user_id, contact_id, include_deleted=True) + + def restore(self, user_id, contact_id): + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row or not row.get('deleted_at'): + return None, 'not_found' + if row.get('label') in ('Home', 'Work'): + existing = conn.execute( + "SELECT id FROM contacts WHERE user_id = ? AND label = ? AND deleted_at IS NULL AND id != ?", + (user_id, row['label'], contact_id) + ).fetchone() + if existing: + return None, 'conflict' + conn.execute( + "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL WHERE id = ? AND user_id = ?", + (contact_id, user_id) + ) + conn.commit() + return self.get(user_id, contact_id), None + + def restore_as(self, user_id, contact_id, new_label): + """Restore a soft-deleted contact with a new label (for Home/Work conflict resolution).""" + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row or not row.get('deleted_at'): + return None, 'not_found' + if not new_label or not new_label.strip(): + return None, 'invalid_label' + now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%fZ') + try: + conn.execute( + "UPDATE contacts SET deleted_at = NULL, deleted_by = NULL, label = ?, updated_at = ? WHERE id = ? AND user_id = ?", + (new_label.strip(), now, contact_id, user_id) + ) + conn.commit() + except sqlite3.IntegrityError: + return None, 'conflict' + return self.get(user_id, contact_id), None + + def purge(self, user_id, contact_id): + conn = self._get_conn() + row = self.get(user_id, contact_id, include_deleted=True) + if not row: + return False, 'not_found' + if not row.get('deleted_at'): + return False, 'not_deleted' + conn.execute("DELETE FROM contacts WHERE id = ? AND user_id = ?", (contact_id, user_id)) + conn.commit() + return True, None + + def find_nearby(self, user_id, lat, lon, radius_m=75): + conn = self._get_conn() + # Bounding box pre-filter (~111km per degree lat) + dlat = radius_m / 111_000 + dlon = radius_m / (111_000 * math.cos(math.radians(lat))) + rows = conn.execute( + """SELECT * FROM contacts + WHERE user_id = ? AND deleted_at IS NULL AND show_proximity = 1 + AND lat BETWEEN ? AND ? AND lon BETWEEN ? AND ?""", + (user_id, lat - dlat, lat + dlat, lon - dlon, lon + dlon) + ).fetchall() + results = [] + for r in rows: + dist = _haversine_m(lat, lon, r['lat'], r['lon']) + if dist <= radius_m: + d = _row_to_dict(r) + d['distance_m'] = round(dist, 1) + results.append(d) + results.sort(key=lambda x: x['distance_m']) + return results diff --git a/lib/contacts_api.py b/lib/contacts_api.py new file mode 100644 index 0000000..0e4506b --- /dev/null +++ b/lib/contacts_api.py @@ -0,0 +1,132 @@ +""" +RECON Contacts API — Flask Blueprint. + +Per-user phone book with soft delete, restore, purge, and proximity queries. +All endpoints require Authentik forward-auth (X-Authentik-Username header). +""" +from flask import Blueprint, request, jsonify + +from .auth import require_auth +from .contacts import ContactsDB + +contacts_bp = Blueprint('contacts', __name__) + +_db = None + +def _get_db(): + global _db + if _db is None: + _db = ContactsDB() + return _db + + +@contacts_bp.route('/api/contacts', methods=['GET']) +@require_auth +def list_contacts(): + db = _get_db() + category = request.args.get('category') + search = request.args.get('search') + return jsonify(db.list_all(request.user_id, category=category, search=search)) + + +@contacts_bp.route('/api/contacts', methods=['POST']) +@require_auth +def create_contact(): + db = _get_db() + data = request.get_json(force=True) + contact, err = db.create(request.user_id, **data) + if err == 'conflict': + return jsonify({'error': 'You already have a Home/Work contact'}), 409 + return jsonify(contact), 201 + + +@contacts_bp.route('/api/contacts/nearby', methods=['GET']) +@require_auth +def nearby_contacts(): + db = _get_db() + lat = request.args.get('lat', type=float) + lon = request.args.get('lon', type=float) + radius_m = request.args.get('radius_m', 75, type=float) + if lat is None or lon is None: + return jsonify({'error': 'lat and lon required'}), 400 + return jsonify(db.find_nearby(request.user_id, lat, lon, radius_m)) + + +@contacts_bp.route('/api/contacts/deleted', methods=['GET']) +@require_auth +def list_deleted(): + db = _get_db() + return jsonify(db.list_deleted(request.user_id)) + + +@contacts_bp.route('/api/contacts/', methods=['GET']) +@require_auth +def get_contact(contact_id): + db = _get_db() + contact = db.get(request.user_id, contact_id) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts/', methods=['PATCH']) +@require_auth +def update_contact(contact_id): + db = _get_db() + data = request.get_json(force=True) + contact = db.update(request.user_id, contact_id, **data) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts/', methods=['DELETE']) +@require_auth +def delete_contact(contact_id): + db = _get_db() + contact = db.soft_delete(request.user_id, contact_id) + if not contact: + return jsonify({'error': 'Not found'}), 404 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts//restore', methods=['POST']) +@require_auth +def restore_contact(contact_id): + db = _get_db() + contact, err = db.restore(request.user_id, contact_id) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'conflict': + return jsonify({'error': 'You already have a Home/Work contact'}), 409 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts//restore-as', methods=['POST']) +@require_auth +def restore_as_contact(contact_id): + db = _get_db() + data = request.get_json(force=True) + new_label = data.get('label', '').strip() + if not new_label: + return jsonify({'error': 'label is required'}), 400 + contact, err = db.restore_as(request.user_id, contact_id, new_label) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'invalid_label': + return jsonify({'error': 'Invalid label'}), 400 + if err == 'conflict': + return jsonify({'error': 'Label conflict'}), 409 + return jsonify(contact) + + +@contacts_bp.route('/api/contacts//purge', methods=['DELETE']) +@require_auth +def purge_contact(contact_id): + db = _get_db() + ok, err = db.purge(request.user_id, contact_id) + if err == 'not_found': + return jsonify({'error': 'Not found'}), 404 + if err == 'not_deleted': + return jsonify({'error': 'Contact must be deleted before purging'}), 400 + return jsonify({'ok': True}) diff --git a/lib/deployment_config.py b/lib/deployment_config.py index ab6aa17..978b8a0 100644 --- a/lib/deployment_config.py +++ b/lib/deployment_config.py @@ -3,15 +3,7 @@ Deployment profile loader. Reads RECON_PROFILE env var (default: "home"), loads the matching YAML from config/profiles/.yaml, and caches the parsed dict in memory. - -Exposes get_deployment_config() as the in-process accessor for the profile. - -Note: its former consumers (the /api/landclass gate, google_places, -place_detail, offroute/router) were all extracted to navi-* services or removed -across cleanups #4–#6/#27 — recon has no remaining caller of -get_deployment_config() today; the module is retained per cleanup #1. -(The former /api/config HTTP endpoint that served this dict to the frontend was -removed once navi-config (:8422) took over that route.) +Provides get_deployment_config() for use by the /api/config endpoint. """ import os import yaml diff --git a/lib/geocode.py b/lib/geocode.py new file mode 100644 index 0000000..aabd37e --- /dev/null +++ b/lib/geocode.py @@ -0,0 +1,774 @@ +""" +RECON geocode — structured preprocessing, multi-source retrieval, reranking. + +Replaces the naive Photon-only search with: + 1. usaddress parsing + intent classification (ADDRESS / POI / LOCALITY / COORD / POSTCODE) + 2. Multi-source retrieval: ADDRESS → Netsyms + Photon; POI/LOCALITY → Photon /api + 3. Python reranker with weighted signals + +Public entry point: geocode(query, limit) → {query, results, count} +""" + +import math +import re +import logging + +import requests +import usaddress +from rapidfuzz import fuzz + +from .utils import setup_logging + +logger = setup_logging('recon.geocode') + +# ── Trace logger for reranking audit ── +_trace_logger = logging.getLogger('recon.geocode.trace') +_trace_handler = logging.FileHandler('/tmp/geocode_rerank_trace.log') +_trace_handler.setFormatter(logging.Formatter('%(asctime)s %(message)s')) +_trace_logger.addHandler(_trace_handler) +_trace_logger.setLevel(logging.DEBUG) + +# ── Config constants ── +PHOTON_URL = "http://localhost:2322" +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# ── Reranker weights ── +# Derived from research analysis of failure modes: +# housenumber_exact is the strongest signal because Photon's soft-boost +# lets wrong-number results bubble up. street_name_fuzz and locality_fuzz +# handle abbreviation/case variation. source_authority gives Netsyms a +# boost for US addresses since it has USPS-verified data. +W_HOUSENUMBER_EXACT = 6.0 # exact housenumber match +W_HOUSENUMBER_MISMATCH = -5.0 # housenumber present but wrong +W_STREET_NAME_FUZZ = 3.0 # fuzzy street name similarity [0..1] * weight +W_TOKEN_COVERAGE = 2.0 # fraction of query tokens found in result +W_STREET_TYPE_MATCH = 1.5 # "st" matches "street", etc. +W_LOCALITY_FUZZ = 2.0 # city/state fuzzy match +W_SOURCE_AUTHORITY = 2.0 # Netsyms for US addresses +W_LAYER_RANK = 1.0 # type-appropriate results ranked higher +W_PHOTON_POSITION_NORM = 1.0 # Photon's native ranking (normalized by position) +W_STATE_EXACT = 1.0 # exact state code match +W_POI_CLASS_BOOST = 3.0 # amenity/shop/etc boost for business-name queries +W_HIGHWAY_CLASS_PENALTY = -4.0 # highway/route penalty for business-name queries + +# ── US abbreviation expansions ── +# Applied ONLY to parsed StreetName/StreetNamePostType tokens, NOT to ordinals. +_STREET_TYPE_ABBREVS = { + 'st': 'street', 'ave': 'avenue', 'blvd': 'boulevard', 'dr': 'drive', + 'rd': 'road', 'ln': 'lane', 'ct': 'court', 'cir': 'circle', + 'pl': 'place', 'way': 'way', 'pkwy': 'parkway', 'hwy': 'highway', + 'trl': 'trail', 'ter': 'terrace', 'sq': 'square', +} +_DIRECTIONAL_ABBREVS = { + 'n': 'north', 's': 'south', 'e': 'east', 'w': 'west', + 'ne': 'northeast', 'nw': 'northwest', 'se': 'southeast', 'sw': 'southwest', +} +_ORDINAL_RE = re.compile(r'^\d+(st|nd|rd|th)$', re.IGNORECASE) + +# ── Road keywords (for detecting when query is about a road vs a business) ── +_ROAD_KEYWORDS = ( + set(_STREET_TYPE_ABBREVS.keys()) + | set(_STREET_TYPE_ABBREVS.values()) + | {'route', 'rte', 'pass'} +) + +# ── US state codes ── +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC', +} + +# ── Full state name → code (for intent classifier) ── +_STATE_NAME_TO_CODE = { + 'alabama': 'AL', 'alaska': 'AK', 'arizona': 'AZ', 'arkansas': 'AR', + 'california': 'CA', 'colorado': 'CO', 'connecticut': 'CT', 'delaware': 'DE', + 'florida': 'FL', 'georgia': 'GA', 'hawaii': 'HI', 'idaho': 'ID', + 'illinois': 'IL', 'indiana': 'IN', 'iowa': 'IA', 'kansas': 'KS', + 'kentucky': 'KY', 'louisiana': 'LA', 'maine': 'ME', 'maryland': 'MD', + 'massachusetts': 'MA', 'michigan': 'MI', 'minnesota': 'MN', + 'mississippi': 'MS', 'missouri': 'MO', 'montana': 'MT', 'nebraska': 'NE', + 'nevada': 'NV', 'new hampshire': 'NH', 'new jersey': 'NJ', + 'new mexico': 'NM', 'new york': 'NY', 'north carolina': 'NC', + 'north dakota': 'ND', 'ohio': 'OH', 'oklahoma': 'OK', 'oregon': 'OR', + 'pennsylvania': 'PA', 'rhode island': 'RI', 'south carolina': 'SC', + 'south dakota': 'SD', 'tennessee': 'TN', 'texas': 'TX', 'utah': 'UT', + 'vermont': 'VT', 'virginia': 'VA', 'washington': 'WA', + 'west virginia': 'WV', 'wisconsin': 'WI', 'wyoming': 'WY', +} + +# Coordinate regex +_COORD_RE = re.compile(r'^\s*(-?\d+\.?\d*)\s*[,\s]\s*(-?\d+\.?\d*)\s*$') + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 1: PREPROCESSING +# ═══════════════════════════════════════════════════════════════════ + +def _parse_coords(text): + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" + m = _COORD_RE.match(text.strip()) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon + return None + + +def _classify_and_parse(query): + """ + Parse query with usaddress, classify intent, expand abbreviations. + + Returns (intent, parsed_dict) where: + intent: 'ADDRESS' | 'POI' | 'LOCALITY' | 'POSTCODE' | 'COORD' | 'UNKNOWN' + parsed_dict: {number, street, city, state, zipcode, raw_query, expanded_query} + """ + q = query.strip() + parsed = { + 'number': None, 'street': None, 'street_raw': None, + 'city': None, 'state': None, + 'zipcode': None, 'raw_query': q, 'expanded_query': q, + } + + # Coordinate check first + if _parse_coords(q): + return 'COORD', parsed + + # Try usaddress + try: + tagged, addr_type = usaddress.tag(q) + except usaddress.RepeatedLabelError: + # Ambiguous input — fall back to free-text Photon + return 'UNKNOWN', parsed + + # Extract components + number = tagged.get('AddressNumber', '').strip() + street_name = tagged.get('StreetName', '').strip() + street_pre_dir = tagged.get('StreetNamePreDirectional', '').strip() + street_post_type = tagged.get('StreetNamePostType', '').strip() + place = tagged.get('PlaceName', '').strip() + state = tagged.get('StateName', '').strip() + zipcode = tagged.get('ZipCode', '').strip() + + # ── Fix usaddress edge case: "214 N St Filer" ── + # usaddress reads single-letter directional + "St" as PreDirectional + empty, + # mashing "St Filer" into StreetName. Detect: PreDirectional is single letter, + # StreetName has 2+ tokens where the first is a street type. + if (street_pre_dir and len(street_pre_dir) <= 2 + and not street_name.strip().startswith(street_pre_dir) + and ' ' in street_name): + name_tokens = street_name.split() + first_lower = name_tokens[0].lower() + if first_lower in _STREET_TYPE_ABBREVS or first_lower in _STREET_TYPE_ABBREVS.values(): + # "N" is actually the street name, "St" is the post-type + street_name = street_pre_dir + street_post_type = name_tokens[0] + if len(name_tokens) > 1: + place = ' '.join(name_tokens[1:]) + street_pre_dir = '' + + # ── Expand abbreviations (guard ordinals) ── + expanded_parts = [] + + if number: + parsed['number'] = number + expanded_parts.append(number) + + if street_pre_dir: + exp = _DIRECTIONAL_ABBREVS.get(street_pre_dir.lower(), street_pre_dir) + expanded_parts.append(exp) + + if street_name: + # Don't expand ordinals: "21st" stays "21st" + if _ORDINAL_RE.match(street_name): + expanded_parts.append(street_name) + else: + # Expand directional abbreviation if it IS the street name + exp = _DIRECTIONAL_ABBREVS.get(street_name.lower(), street_name) + expanded_parts.append(exp) + parsed['street'] = street_name + + if street_post_type: + if _ORDINAL_RE.match(street_post_type): + expanded_parts.append(street_post_type) + else: + exp = _STREET_TYPE_ABBREVS.get(street_post_type.lower(), street_post_type) + expanded_parts.append(exp) + + # Build raw street (original abbreviations, for Netsyms) and expanded (for Photon) + raw_street_parts = [] + if street_pre_dir: + raw_street_parts.append(street_pre_dir) + if street_name: + raw_street_parts.append(street_name) + if street_post_type: + raw_street_parts.append(street_post_type) + parsed['street_raw'] = ' '.join(raw_street_parts) + + # Build the full expanded street + if expanded_parts: + # The street is everything after the number + street_full = ' '.join(expanded_parts[1:] if number else expanded_parts) + parsed['street'] = street_full + + if place: + parsed['city'] = place + expanded_parts.append(place) + if state: + parsed['state'] = state.upper() + expanded_parts.append(state) + if zipcode: + parsed['zipcode'] = zipcode + expanded_parts.append(zipcode) + + parsed['expanded_query'] = ' '.join(expanded_parts) + + # ── Intent classification ── + if addr_type == 'Street Address' and number: + return 'ADDRESS', parsed + elif zipcode and not number and not street_name: + return 'POSTCODE', parsed + elif addr_type == 'Ambiguous': + # Check if it looks like a locality: last token(s) are a state code or name + tokens = q.replace(',', ' ').split() + if len(tokens) >= 2: + last_upper = tokens[-1].upper() + if last_upper in _STATE_CODES: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = last_upper + return 'LOCALITY', parsed + # Check full state names (single-word like "idaho" or two-word like "new york") + last_lower = tokens[-1].lower() + if last_lower in _STATE_NAME_TO_CODE: + parsed['city'] = ' '.join(tokens[:-1]) + parsed['state'] = _STATE_NAME_TO_CODE[last_lower] + return 'LOCALITY', parsed + if len(tokens) >= 3: + two_word = f"{tokens[-2].lower()} {last_lower}" + if two_word in _STATE_NAME_TO_CODE: + parsed['city'] = ' '.join(tokens[:-2]) + parsed['state'] = _STATE_NAME_TO_CODE[two_word] + return 'LOCALITY', parsed + return 'UNKNOWN', parsed + else: + return 'UNKNOWN', parsed + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 2: RETRIEVAL +# ═══════════════════════════════════════════════════════════════════ + +def _retrieve_netsyms(parsed, limit=10, lat=None, lon=None): + """Query Netsyms for structured address lookup. Returns list of candidate dicts.""" + try: + from . import netsyms + except Exception: + return [] + + results = [] + number = parsed.get('number', '') + street = parsed.get('street_raw') or parsed.get('street', '') + city = parsed.get('city', '') + state = parsed.get('state', '') + zipcode = parsed.get('zipcode', '') + + # When viewport provided, fetch more results to sort from + fetch_limit = 200 if (lat is not None and lon is not None) else limit + + if number and street: + rows = netsyms.lookup_by_street( + number, street, city=city, state=state, zipcode=zipcode, limit=fetch_limit + ) + elif zipcode: + rows = netsyms.lookup_by_zipcode(zipcode, limit=fetch_limit) + else: + return [] + + for row in rows: + addr_parts = [row['number'], row['street']] + if row.get('street2'): + addr_parts.append(row['street2']) + addr_parts.extend([row['city'], row['state'], row['zipcode']]) + display = ' '.join(p for p in addr_parts if p) + results.append({ + 'name': display, + 'lat': row['lat'], + 'lon': row['lon'], + 'source': 'netsyms', + 'type': 'street_address', + 'raw': row, + '_number': row.get('number', ''), + '_street': row.get('street', ''), + '_city': row.get('city', ''), + '_state': row.get('state', ''), + }) + # Sort by viewport distance if lat/lon provided, then limit + if lat is not None and lon is not None and results: + results.sort(key=lambda r: (r["lat"] - lat)**2 + (r["lon"] - lon)**2) + results = results[:limit] + return results + + +def _retrieve_photon_structured(parsed, limit=10): + """Query Photon /structured endpoint for address lookup.""" + params = {'limit': limit, 'countrycode': 'US'} + if parsed.get('street'): + params['street'] = parsed['street'] + if parsed.get('number'): + params['housenumber'] = parsed['number'] + if parsed.get('city'): + params['city'] = parsed['city'] + if parsed.get('state'): + params['state'] = parsed['state'] + + if 'street' not in params: + return [] + + try: + resp = requests.get(f"{PHOTON_URL}/structured", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + logger.debug("Photon /structured failed: %s", e) + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _retrieve_photon_freetext(query, limit=10, lat=None, lon=None, zoom=None): + """Query Photon /api for free-text search with location bias.""" + try: + params = { + 'q': query, + 'limit': limit, + 'lat': lat if lat is not None else GEOCODE_BIAS_LAT, + 'lon': lon if lon is not None else GEOCODE_BIAS_LON, + 'zoom': int(zoom) if zoom is not None else GEOCODE_BIAS_ZOOM, + } + resp = requests.get(f"{PHOTON_URL}/api", params=params, timeout=5) + resp.raise_for_status() + data = resp.json() + except Exception as e: + return [] + + return _parse_photon_features(data.get('features', []), 'photon') + + +def _parse_photon_features(features, source): + """Convert Photon GeoJSON features to candidate dicts.""" + results = [] + for i, feature in enumerate(features): + props = feature.get('properties', {}) + coords = feature.get('geometry', {}).get('coordinates', [0, 0]) + + osm_key = props.get('osm_key', '') + osm_value = props.get('osm_value', '') + feat_type = props.get('type', '') + has_hn = bool(props.get('housenumber')) + + if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office'): + rtype = 'poi' + elif has_hn or osm_value in ('house', 'residential'): + rtype = 'street_address' + elif feat_type in ('city', 'town', 'village', 'hamlet', 'county', 'state', 'country'): + rtype = 'locality' + else: + rtype = 'poi' + + # Build display name + parts = [] + hn = props.get('housenumber') + street = props.get('street') + name = props.get('name', '') + if hn and street: + parts.append(f"{hn} {street}") + if name and name != street: + parts.append(name) + elif name: + parts.append(name) + elif street: + parts.append(street) + for key in ('city', 'county', 'state', 'country'): + v = props.get(key) + if v and (not parts or v != parts[-1]): + parts.append(v) + display = ', '.join(p for p in parts if p) or 'Unknown' + + results.append({ + 'name': display, + 'lat': coords[1], + 'lon': coords[0], + 'source': source, + 'type': rtype, + 'raw': props, + '_photon_rank': i, + '_number': props.get('housenumber', ''), + '_street': props.get('street', ''), + # For locality results, the name IS the city (Photon omits 'city' on city-type features) + '_city': props.get('city', '') or (props.get('name', '') if rtype == 'locality' else ''), + '_state': props.get('state', ''), + }) + return results + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 3: RERANKER +# ═══════════════════════════════════════════════════════════════════ + +def _expand_street_type(s): + """Expand a street type abbreviation for comparison.""" + return _STREET_TYPE_ABBREVS.get(s.lower(), s.lower()) + + +def _score_candidate(candidate, parsed, intent): + """ + Score a candidate against the parsed query. + Returns (total_score, signal_breakdown_dict). + """ + signals = {} + total = 0.0 + + query_number = (parsed.get('number') or '').strip().upper() + query_street = (parsed.get('street') or '').strip().upper() + query_city = (parsed.get('city') or '').strip().upper() + query_state = (parsed.get('state') or '').strip().upper() + + cand_number = (candidate.get('_number') or '').strip().upper() + cand_street = (candidate.get('_street') or '').strip().upper() + cand_city = (candidate.get('_city') or '').strip().upper() + cand_state = (candidate.get('_state') or '').strip().upper() + + # ── Housenumber ── + if intent == 'ADDRESS' and query_number: + if cand_number == query_number: + signals['housenumber_exact'] = W_HOUSENUMBER_EXACT + total += W_HOUSENUMBER_EXACT + elif cand_number and cand_number != query_number: + signals['housenumber_mismatch'] = W_HOUSENUMBER_MISMATCH + total += W_HOUSENUMBER_MISMATCH + + # ── Street name fuzz ── + if query_street and cand_street: + # Expand both for comparison + q_expanded = ' '.join(_expand_street_type(t) for t in query_street.split()) + c_expanded = ' '.join(_expand_street_type(t) for t in cand_street.split()) + ratio = fuzz.token_sort_ratio(q_expanded, c_expanded) / 100.0 + score = ratio * W_STREET_NAME_FUZZ + signals['street_name_fuzz'] = round(score, 2) + total += score + + # ── Street type match ── + if query_street and cand_street: + q_tokens = set(_expand_street_type(t) for t in query_street.split()) + c_tokens = set(_expand_street_type(t) for t in cand_street.split()) + # Check if the street type words overlap + street_types = set(_STREET_TYPE_ABBREVS.values()) + q_types = q_tokens & street_types + c_types = c_tokens & street_types + if q_types and q_types & c_types: + signals['street_type_match'] = W_STREET_TYPE_MATCH + total += W_STREET_TYPE_MATCH + + # ── Token coverage ── + raw_q = parsed.get('raw_query', '').upper() + q_tokens = set(raw_q.replace(',', ' ').split()) + if q_tokens: + cand_text = candidate.get('name', '').upper() + matched = sum(1 for t in q_tokens if t in cand_text) + coverage = matched / len(q_tokens) + score = coverage * W_TOKEN_COVERAGE + signals['token_coverage'] = round(score, 2) + total += score + + # ── Locality fuzz ── + if query_city and cand_city: + ratio = fuzz.ratio(query_city, cand_city) / 100.0 + score = ratio * W_LOCALITY_FUZZ + signals['locality_fuzz'] = round(score, 2) + total += score + + # ── State exact ── + if query_state and cand_state: + if cand_state == query_state: + signals['state_exact'] = W_STATE_EXACT + total += W_STATE_EXACT + + # ── Source authority ── + if candidate.get('source') == 'netsyms' and intent == 'ADDRESS': + signals['source_authority'] = W_SOURCE_AUTHORITY + total += W_SOURCE_AUTHORITY + + # ── Layer rank (type-appropriate bonus) ── + cand_type = candidate.get('type', '') + if intent == 'ADDRESS' and cand_type == 'street_address': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'LOCALITY' and cand_type == 'locality': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + elif intent == 'POI' and cand_type == 'poi': + signals['layer_rank'] = W_LAYER_RANK + total += W_LAYER_RANK + + # ── Photon position normalization ── + photon_rank = candidate.get('_photon_rank') + if photon_rank is not None: + # Top result gets full bonus, decays linearly + score = max(0, (1.0 - photon_rank / 10.0)) * W_PHOTON_POSITION_NORM + signals['photon_position'] = round(score, 2) + total += score + + # ── Business intent POI boost ── + # When the query has no road keywords (likely a business/POI search), + # boost amenity/shop/etc results and penalize highway/route results. + # Skipped for LOCALITY, POSTCODE, COORD queries where class is irrelevant. + if intent not in ('LOCALITY', 'POSTCODE', 'COORD'): + q_tokens_lower = set(parsed.get('raw_query', '').lower().replace(',', ' ').split()) + if not (q_tokens_lower & _ROAD_KEYWORDS): + osm_key = (candidate.get('raw') or {}).get('osm_key', '') + if osm_key in ('amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'): + signals['poi_class_boost'] = W_POI_CLASS_BOOST + total += W_POI_CLASS_BOOST + elif osm_key in ('highway', 'route'): + signals['highway_class_penalty'] = W_HIGHWAY_CLASS_PENALTY + total += W_HIGHWAY_CLASS_PENALTY + + return round(total, 2), signals + + +def _build_match_code(candidate, parsed, intent): + """Build a match_code dict indicating match quality for each field.""" + mc = {} + if intent == 'ADDRESS': + q_num = (parsed.get('number') or '').strip().upper() + c_num = (candidate.get('_number') or '').strip().upper() + if q_num and c_num == q_num: + mc['housenumber'] = 'matched' + elif q_num and c_num: + mc['housenumber'] = 'unmatched' + elif q_num and not c_num: + mc['housenumber'] = 'inferred' + + q_street = (parsed.get('street') or '').strip().upper() + c_street = (candidate.get('_street') or '').strip().upper() + if q_street and c_street: + q_exp = ' '.join(_expand_street_type(t) for t in q_street.split()) + c_exp = ' '.join(_expand_street_type(t) for t in c_street.split()) + ratio = fuzz.token_sort_ratio(q_exp, c_exp) / 100.0 + mc['street'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_street: + mc['street'] = 'inferred' + + q_city = (parsed.get('city') or '').strip().upper() + c_city = (candidate.get('_city') or '').strip().upper() + if q_city and c_city: + ratio = fuzz.ratio(q_city, c_city) / 100.0 + mc['city'] = 'matched' if ratio > 0.8 else 'unmatched' + elif q_city: + mc['city'] = 'inferred' + + return mc + + +def _rerank(candidates, parsed, intent, query, limit): + """Score, sort, and trim candidates. Trace-log top 3.""" + scored = [] + for c in candidates: + total, signals = _score_candidate(c, parsed, intent) + c['_score'] = total + c['_signals'] = signals + scored.append(c) + + scored.sort(key=lambda c: c['_score'], reverse=True) + + # Trace log for audit + _trace_logger.debug("─── Query: %r intent=%s ───", query, intent) + for i, c in enumerate(scored): + osm_key = (c.get('raw') or {}).get('osm_key', '—') + osm_val = (c.get('raw') or {}).get('osm_value', '—') + _trace_logger.debug( + " #%d score=%.2f src=%s key=%s/%s name=%s", + i, c['_score'], c.get('source', '?'), osm_key, osm_val, + c.get('name', '?')[:60] + ) + _trace_logger.debug(" signals=%s", c.get('_signals', {})) + + # Clean internal fields and add match_code + result = [] + for c in scored[:limit]: + mc = _build_match_code(c, parsed, intent) + + # Assign confidence from score + score = c.get('_score', 0) + if score >= 10: + confidence = 'exact' + elif score >= 5: + confidence = 'high' + elif score >= 2: + confidence = 'medium' + else: + confidence = 'low' + + entry = { + 'name': c['name'], + 'lat': c['lat'], + 'lon': c['lon'], + 'source': c['source'], + 'confidence': confidence, + 'type': c.get('type', 'poi'), + 'raw': c.get('raw'), + } + if mc: + entry['match_code'] = mc + result.append(entry) + + return result + + +# ═══════════════════════════════════════════════════════════════════ +# STEP 4: ANNOTATION +# ═══════════════════════════════════════════════════════════════════ + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters.""" + R = 6_371_000 + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _annotate_with_address_book(results): + """Add labeled_as to results within radius of an address book entry.""" + try: + from . import address_book + entries = address_book.load() + except Exception: + return + for result in results: + rlat, rlon = result.get('lat'), result.get('lon') + if rlat is None or rlon is None: + continue + for entry in entries: + elat, elon = entry.get('lat'), entry.get('lon') + if elat is None or elon is None: + continue + if _haversine_m(rlat, rlon, elat, elon) <= ADDRESS_BOOK_ANNOTATION_RADIUS_M: + result['labeled_as'] = entry['name'] + break + + +# ═══════════════════════════════════════════════════════════════════ +# PUBLIC API +# ═══════════════════════════════════════════════════════════════════ + +def geocode(query, limit=10, lat=None, lon=None, zoom=None): + """ + Structured geocoding with multi-source retrieval and reranking. + + Returns {query, results: [...], count} — always 200-safe. + """ + limit = max(1, min(limit, 20)) + q = (query or '').strip() + empty = {'query': q, 'results': [], 'count': 0} + + if not q: + return empty + + # ── Coordinate detection ── + coords = _parse_coords(q) + if coords: + return { + 'query': q, + 'results': [{ + 'name': q, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'confidence': 'exact', + 'type': 'coordinates', + 'raw': None, + }], + 'count': 1, + } + + # ── Address book nickname short-circuit ── + normalized_q = ' '.join(q.lower().replace(',', ' ').split()) + is_single_word = ' ' not in normalized_q + try: + from . import address_book + ab_match = address_book.lookup(q) + if (ab_match + and ab_match['confidence'] == 'exact' + and ab_match.get('lat') and ab_match.get('lon') + and is_single_word): + logger.info("geocode: nickname short-circuit %r → %s", q, ab_match['name']) + return { + 'query': q, + 'results': [{ + 'name': ab_match.get('address') or ab_match['name'], + 'lat': ab_match['lat'], + 'lon': ab_match['lon'], + 'source': 'address_book', + 'confidence': 'exact', + 'type': 'nickname', + 'raw': ab_match, + }], + 'count': 1, + } + except Exception as e: + logger.debug("geocode: address_book lookup failed: %s", e) + + # ── Classify intent + parse ── + intent, parsed = _classify_and_parse(q) + logger.debug("geocode: intent=%s parsed=%s", intent, parsed) + + # ── Retrieve candidates ── + candidates = [] + + if intent == 'ADDRESS': + # Parallel: Netsyms (structured) + Photon (freetext with expanded query) + netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) + photon_results = _retrieve_photon_freetext( + parsed.get('expanded_query', q), limit=limit, lat=lat, lon=lon, zoom=zoom + ) + # Also try Photon /structured for addresses + photon_struct = _retrieve_photon_structured(parsed, limit=5) + candidates = netsyms_results + photon_results + photon_struct + + elif intent == 'POSTCODE': + netsyms_results = _retrieve_netsyms(parsed, limit=limit, lat=lat, lon=lon) + photon_results = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) + candidates = netsyms_results + photon_results + + elif intent in ('LOCALITY', 'POI', 'UNKNOWN'): + candidates = _retrieve_photon_freetext(q, limit=limit, lat=lat, lon=lon, zoom=zoom) + + # ── Deduplicate by (lat, lon) proximity ── + deduped = [] + for c in candidates: + is_dup = False + for existing in deduped: + if (_haversine_m(c['lat'], c['lon'], existing['lat'], existing['lon']) < 50 + and c.get('source') == existing.get('source')): + is_dup = True + break + if not is_dup: + deduped.append(c) + candidates = deduped + + # ── Rerank ── + results = _rerank(candidates, parsed, intent, q, limit) + + # ── Address book annotation ── + _annotate_with_address_book(results) + + logger.info("geocode: %r → intent=%s, %d results", q, intent, len(results)) + return {'query': q, 'results': results, 'count': len(results)} diff --git a/lib/geocode_test.py b/lib/geocode_test.py new file mode 100644 index 0000000..4717b1e --- /dev/null +++ b/lib/geocode_test.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Tests for RECON Photon-first geocode chain.""" +import sys +import os +import json +import urllib.request +import urllib.parse + +BASE = "http://localhost:8420" + +TESTS = [ + { + "name": "home → nickname short-circuit", + "query": "home", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "address_book" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "nickname" + ), + }, + { + "name": "214 north st filer → netsyms exact match (multi-word, not nickname)", + "query": "214 north st filer", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "netsyms" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "street_address" + ), + }, + { + "name": "214 North St, Filer, ID → netsyms (case/punctuation)", + "query": "214 North St, Filer, ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", + }, + { + "name": "214 NORTH ST FILER ID → netsyms (uppercase)", + "query": "214 NORTH ST FILER ID", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "netsyms", + }, + { + "name": "1600 Pennsylvania Ave Washington DC → White House", + "query": "1600 Pennsylvania Ave Washington DC", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + ), + }, + { + "name": "1600 pennsylvania ave washington dc → lowercase", + "query": "1600 pennsylvania ave washington dc", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "starbucks filer → POI result", + "query": "starbucks filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "filer idaho → locality", + "query": "filer idaho", + "check": lambda r: ( + r["count"] >= 1 + and r["results"][0]["source"] == "photon" + and r["results"][0]["type"] == "locality" + ), + }, + { + "name": "filer → partial query, at least 1 result", + "query": "filer", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "42.5736, -114.6066 → coordinates (with space)", + "query": "42.5736, -114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + and r["results"][0]["type"] == "coordinates" + ), + }, + { + "name": "42.5736,-114.6066 → coordinates (no space)", + "query": "42.5736,-114.6066", + "check": lambda r: ( + r["count"] == 1 + and r["results"][0]["source"] == "coordinates" + and r["results"][0]["confidence"] == "exact" + ), + }, + { + "name": "boise → at least 1 result", + "query": "boise", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "toronto → CA canary", + "query": "toronto", + "check": lambda r: r["count"] >= 1 and r["results"][0]["source"] == "photon", + }, + { + "name": "asdfghjklqwerty → empty results, 200 OK", + "query": "asdfghjklqwerty", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, + { + "name": "empty query → empty results", + "query": "", + "check": lambda r: r["count"] == 0 and r["results"] == [], + }, +] + +passed = 0 +failed = 0 + +for t in TESTS: + q = urllib.parse.urlencode({"q": t["query"]}) if t["query"] else "q=" + url = f"{BASE}/api/geocode?{q}" + try: + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=10) as resp: + status = resp.status + body = json.loads(resp.read()) + except urllib.error.HTTPError as e: + status = e.code + try: + body = json.loads(e.read()) + except Exception: + body = {} + except Exception as e: + status = 0 + body = {} + print(f" [FAIL] {t['name']}") + print(f" EXCEPTION: {e}") + failed += 1 + continue + + ok = status == 200 and t["check"](body) + tag = "PASS" if ok else "FAIL" + if ok: + passed += 1 + else: + failed += 1 + + top = body.get("results", [{}])[0] if body.get("results") else {} + top_summary = f"source={top.get('source','—')} type={top.get('type','—')} conf={top.get('confidence','—')} name={top.get('name','—')[:50]}" + print(f" [{tag}] {t['name']}") + if not ok: + print(f" HTTP {status}, count={body.get('count','?')}, top: {top_summary}") + else: + labeled = f" labeled_as={top.get('labeled_as')}" if top.get('labeled_as') else "" + print(f" → {top_summary}{labeled}") + +print(f"\n{passed} passed, {failed} failed") +sys.exit(0 if failed == 0 else 1) diff --git a/lib/google_places.py b/lib/google_places.py new file mode 100644 index 0000000..8272b81 --- /dev/null +++ b/lib/google_places.py @@ -0,0 +1,397 @@ +""" +Google Places (New) API client for tertiary enrichment. + +Searches for business POIs and fetches details (opening hours, phone, website) +when OSM + Overture data is incomplete. Uses field masks to minimize cost. + +API docs: https://developers.google.com/maps/documentation/places/web-service +""" +import json +import os +import sqlite3 +import time +from datetime import date, timezone, datetime + +import requests + +from .utils import setup_logging + +logger = setup_logging('recon.google_places') + +API_BASE = 'https://places.googleapis.com/v1' +DEFAULT_DAILY_CAP = 500 +REQUEST_TIMEOUT = 3 # seconds + +# Google day index → OSM abbreviation +_DAY_ABBR = ['Su', 'Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa'] + +_db_conn = None + + +def _get_db(): + """Return a module-level SQLite connection (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + db_path = os.path.join(db_dir, 'place_cache.db') + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + # Ensure google_api_calls table exists + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS google_api_calls ( + call_date TEXT PRIMARY KEY, + call_count INTEGER NOT NULL DEFAULT 0 + ) + """) + _db_conn.commit() + return _db_conn + + +def _get_api_key(): + """Return the Google Places API key from environment.""" + key = os.environ.get('GOOGLE_PLACES_API_KEY') + if not key: + logger.error("GOOGLE_PLACES_API_KEY not set in environment") + return key + + +def _get_daily_cap(): + """Return the daily API call cap (configurable via deployment config).""" + try: + from .deployment_config import get_deployment_config + config = get_deployment_config() + return config.get('google_places', {}).get('daily_cap', DEFAULT_DAILY_CAP) + except Exception: + return DEFAULT_DAILY_CAP + + +# ── Daily call counter ────────────────────────────────────────────────── + +def check_daily_cap(): + """Return True if under daily cap, False if limit reached.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + current = row[0] if row else 0 + cap = _get_daily_cap() + if current >= cap: + logger.info(f"google_places: daily_cap_reached count={current} cap={cap}") + return False + return True + + +def get_daily_count(): + """Return today's API call count.""" + db = _get_db() + today = date.today().isoformat() + row = db.execute( + "SELECT call_count FROM google_api_calls WHERE call_date = ?", (today,) + ).fetchone() + return row[0] if row else 0 + + +def increment_call_counter(): + """Atomically increment today's API call counter.""" + db = _get_db() + today = date.today().isoformat() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, 1) + ON CONFLICT(call_date) DO UPDATE SET call_count = call_count + 1 + """, (today,)) + db.commit() + + +def _set_daily_count_to_cap(): + """Set today's counter to the cap value (soft-stop on quota error).""" + db = _get_db() + today = date.today().isoformat() + cap = _get_daily_cap() + db.execute(""" + INSERT INTO google_api_calls (call_date, call_count) VALUES (?, ?) + ON CONFLICT(call_date) DO UPDATE SET call_count = ? + """, (today, cap, cap)) + db.commit() + + +# ── Google Places cache (on place_cache table) ───────────────────────── + +def cache_get_google(osm_type, osm_id): + """Return (google_place_id, google_data_dict) or (None, None).""" + db = _get_db() + row = db.execute( + "SELECT google_place_id, google_data FROM place_cache WHERE osm_type=? AND osm_id=?", + (osm_type, osm_id) + ).fetchone() + if row and row[0]: + data = None + if row[1]: + try: + data = json.loads(row[1]) + except (json.JSONDecodeError, TypeError): + pass + return row[0], data + return None, None + + +def cache_put_google(osm_type, osm_id, place_id, data): + """Store Google Places data for a cache entry (UPSERT on google columns).""" + db = _get_db() + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at, google_place_id, google_data, google_fetched_at) + VALUES (?, ?, '', 'pending', 0, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + google_place_id = excluded.google_place_id, + google_data = excluded.google_data, + google_fetched_at = excluded.google_fetched_at + """, (osm_type, osm_id, place_id, json.dumps(data) if data else None, now)) + db.commit() + + +# ── API calls ─────────────────────────────────────────────────────────── + +def search_place(name, lat, lon, radius_m=200): + """ + Search Google Places (New) for a business by name + location. + Returns the Google Place ID of the best match, or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.post( + f'{API_BASE}/places:searchText', + headers={ + 'Content-Type': 'application/json', + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'places.id,places.displayName,places.location', + }, + json={ + 'textQuery': name, + 'locationBias': { + 'circle': { + 'center': {'latitude': lat, 'longitude': lon}, + 'radius': float(radius_m), + } + }, + 'maxResultCount': 1, + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=search place=%s result=rate_limited", name) + _set_daily_count_to_cap() + return None + + if resp.status_code == 403: + logger.error("google_places: action=search place=%s result=forbidden (invalid key?)", name) + return None + + if resp.status_code != 200: + logger.warning("google_places: action=search place=%s result=error status=%d", name, resp.status_code) + return None + + data = resp.json() + places = data.get('places', []) + if not places: + logger.info("google_places: action=search place=%s result=miss", name) + return None + + place_id = places[0].get('id') + display = places[0].get('displayName', {}).get('text', '?') + logger.info("google_places: action=search place=%s result=hit google_name=%s id=%s", name, display, place_id) + return place_id + + except requests.exceptions.Timeout: + logger.warning("google_places: action=search place=%s result=timeout", name) + return None + except Exception as e: + logger.error("google_places: action=search place=%s result=error err=%s", name, e) + return None + + +def get_place_details(place_id): + """ + Fetch details for a Google Place ID. + Returns dict with {opening_hours, phone_number, website} or None. + """ + key = _get_api_key() + if not key: + return None + + if not check_daily_cap(): + return None + + try: + resp = requests.get( + f'{API_BASE}/places/{place_id}', + headers={ + 'X-Goog-Api-Key': key, + 'X-Goog-FieldMask': 'regularOpeningHours,internationalPhoneNumber,websiteUri', + }, + timeout=REQUEST_TIMEOUT, + ) + + increment_call_counter() + + if resp.status_code == 429: + logger.warning("google_places: action=details id=%s result=rate_limited", place_id) + _set_daily_count_to_cap() + return None + + if resp.status_code != 200: + logger.warning("google_places: action=details id=%s result=error status=%d", place_id, resp.status_code) + return None + + data = resp.json() + result = { + 'opening_hours': None, + 'opening_hours_raw': None, + 'phone_number': None, + 'website': None, + } + + # Phone + phone = data.get('internationalPhoneNumber') + if phone: + result['phone_number'] = phone.replace(' ', '').replace('-', '') + + # Website + result['website'] = data.get('websiteUri') + + # Opening hours + hours = data.get('regularOpeningHours') + if hours: + # Try OSM-compatible format from periods + periods = hours.get('periods', []) + if periods: + osm_str = _periods_to_osm(periods) + if osm_str: + result['opening_hours'] = osm_str + + # Fallback: weekday descriptions (human-readable) + if not result['opening_hours']: + descriptions = hours.get('weekdayDescriptions') + if descriptions: + result['opening_hours_raw'] = descriptions + + logger.info("google_places: action=details id=%s result=hit hours=%s phone=%s website=%s", + place_id, + 'yes' if result['opening_hours'] or result['opening_hours_raw'] else 'no', + 'yes' if result['phone_number'] else 'no', + 'yes' if result['website'] else 'no') + return result + + except requests.exceptions.Timeout: + logger.warning("google_places: action=details id=%s result=timeout", place_id) + return None + except Exception as e: + logger.error("google_places: action=details id=%s result=error err=%s", place_id, e) + return None + + +# ── Opening hours conversion ──────────────────────────────────────────── + +def _periods_to_osm(periods): + """ + Convert Google Places periods array to OSM opening_hours string. + + Google periods: [{"open": {"day": 0-6, "hour": H, "minute": M}, + "close": {"day": 0-6, "hour": H, "minute": M}}, ...] + Where day 0 = Sunday. + + OSM format: "Mo-Fr 06:00-23:00; Sa-Su 07:00-23:00" + """ + if not periods: + return None + + # Check for 24/7: single period with no close, or open 00:00 close 00:00 next day + if len(periods) == 1: + p = periods[0] + o = p.get('open', {}) + c = p.get('close') + if c is None and o.get('hour', 0) == 0 and o.get('minute', 0) == 0: + return '24/7' + + # Build a map: day_index → "HH:MM-HH:MM" + day_hours = {} # day_index → time_range string + for p in periods: + o = p.get('open', {}) + c = p.get('close', {}) + day = o.get('day', 0) + open_time = f"{o.get('hour', 0):02d}:{o.get('minute', 0):02d}" + + if c: + close_time = f"{c.get('hour', 0):02d}:{c.get('minute', 0):02d}" + # Handle midnight closing (00:00 means end of day) + if close_time == '00:00': + close_time = '24:00' + else: + close_time = '24:00' + + time_range = f"{open_time}-{close_time}" + + # A day can have multiple periods (e.g., lunch break) + if day in day_hours: + day_hours[day] = day_hours[day] + ',' + time_range + else: + day_hours[day] = time_range + + if not day_hours: + return None + + # Check if all 7 days have same hours + unique_ranges = set(day_hours.values()) + if len(day_hours) == 7 and len(unique_ranges) == 1: + hours = unique_ranges.pop() + if hours == '00:00-24:00': + return '24/7' + return hours # implicit "every day" + + # Group consecutive days with same hours + # Reorder to OSM convention: Mo(1) Tu(2) We(3) Th(4) Fr(5) Sa(6) Su(0) + osm_day_order = [1, 2, 3, 4, 5, 6, 0] + groups = [] + current_days = [] + current_hours = None + + for day_idx in osm_day_order: + hours = day_hours.get(day_idx) + if hours == current_hours: + current_days.append(day_idx) + else: + if current_days and current_hours: + groups.append((current_days, current_hours)) + current_days = [day_idx] + current_hours = hours + + if current_days and current_hours: + groups.append((current_days, current_hours)) + + if not groups: + return None + + # Format each group + parts = [] + for days, hours in groups: + if len(days) == 1: + day_str = _DAY_ABBR[days[0]] + elif len(days) == 2: + day_str = f"{_DAY_ABBR[days[0]]},{_DAY_ABBR[days[1]]}" + else: + day_str = f"{_DAY_ABBR[days[0]]}-{_DAY_ABBR[days[-1]]}" + parts.append(f"{day_str} {hours}") + + return '; '.join(parts) diff --git a/lib/landclass.py b/lib/landclass.py new file mode 100644 index 0000000..f581994 --- /dev/null +++ b/lib/landclass.py @@ -0,0 +1,252 @@ +""" +PAD-US land classification lookup. + +Provides point-in-polygon queries against the USGS Protected Areas Database +(PAD-US) stored in a local PostGIS database. Returns land ownership, +management, and public access information for any lat/lon coordinate. + +Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, +functions return empty results gracefully (feature degrades, doesn't crash). +""" +import os + +import psycopg2 +import psycopg2.pool + +from .utils import setup_logging + +logger = setup_logging('recon.landclass') + +_pool = None +_pool_failed = False + +# ── Label mappings from PAD-US domain tables ──────────────────────────── +# Extracted from PADUS4_0_Geodatabase.gdb domain lookup layers. +# ogr2ogr lowercases all column names. + +AGENCY_NAME_MAP = { + 'TVA': 'Tennessee Valley Authority', + 'BLM': 'Bureau of Land Management', + 'BOEM': 'Bureau of Ocean Energy Management', + 'USBR': 'Bureau of Reclamation', + 'FWS': 'U.S. Fish and Wildlife Service', + 'USFS': 'Forest Service', + 'DOD': 'Department of Defense', + 'USACE': 'Army Corps of Engineers', + 'DOE': 'Department of Energy', + 'NPS': 'National Park Service', + 'NRCS': 'Natural Resources Conservation Service', + 'ARS': 'Agricultural Research Service', + 'BIA': 'Bureau of Indian Affairs', + 'NOAA': 'National Oceanic and Atmospheric Administration', + 'BPA': 'Bonneville Power Administration', + 'OTHF': 'Other or Unknown Federal Land', + 'TRIB': 'American Indian Lands', + 'SPR': 'State Park and Recreation', + 'SDC': 'State Department of Conservation', + 'SLB': 'State Land Board', +} + +AGENCY_TYPE_MAP = { + 'FED': 'Federal', + 'TRIB': 'American Indian Lands', + 'STAT': 'State', + 'DIST': 'Regional Agency Special District', + 'LOC': 'Local Government', + 'NGO': 'Non-Governmental Organization', + 'PVT': 'Private', + 'JNT': 'Joint', + 'UNK': 'Unknown', + 'TERR': 'Territorial', + 'DESG': 'Designation', +} + +DESIGNATION_TYPE_MAP = { + 'NP': 'National Park', + 'NM': 'National Monument', + 'NCA': 'Conservation Area', + 'NF': 'National Forest', + 'NG': 'National Grassland', + 'PUB': 'National Public Lands', + 'NT': 'National Scenic or Historic Trail', + 'NWR': 'National Wildlife Refuge', + 'WA': 'Wilderness Area', + 'WSR': 'Wild and Scenic River', + 'WSA': 'Wilderness Study Area', + 'MPA': 'Marine Protected Area', + 'NRA': 'National Recreation Area', + 'NSBV': 'National Scenic, Botanical or Volcanic Area', + 'NLS': 'National Lakeshore or Seashore', + 'IRA': 'Inventoried Roadless Area', + 'ACEC': 'Area of Critical Environmental Concern', + 'RNA': 'Research Natural Area', + 'REC': 'Recreation Management Area', + 'RMA': 'Resource Management Area', + 'WPA': 'Watershed Protection Area', + 'REA': 'Research or Educational Area', + 'HCA': 'Historic or Cultural Area', + 'MIT': 'Mitigation Land or Bank', + 'MIL': 'Military Land', + 'ACC': 'Access Area', + 'SDA': 'Special Designation Area', + 'PROC': 'Approved or Proclamation Boundary', + 'FOTH': 'Federal Other or Unknown', + 'ND': 'Not Designated', +} + +PUBLIC_ACCESS_MAP = { + 'OA': 'Open Access', + 'RA': 'Restricted Access', + 'XA': 'Closed', + 'UK': 'Unknown', +} + +GAP_STATUS_MAP = { + '1': 'Managed for biodiversity (disturbance events proceed)', + '2': 'Managed for biodiversity (disturbance suppressed)', + '3': 'Multiple uses (extractive/OHV)', + '4': 'No known mandate for biodiversity protection', +} + +CATEGORY_MAP = { + 'Fee': 'Fee', + 'Easement': 'Easement', + 'Other': 'Other', + 'Unknown': 'Unknown', + 'Designation': 'Designation', + 'Marine': 'Marine Area', + 'Proclamation': 'Approved, Proclamation or Extent Boundary', +} + +STATE_MAP = { + 'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', + 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', + 'DC': 'District of Columbia', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', + 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', + 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', + 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', + 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', + 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', + 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', + 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', + 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', + 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', + 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', +} + + +def _decode(code, label_map): + """Decode a PAD-US code using a label map. Returns decoded label or the raw code.""" + if not code: + return '' + code = str(code).strip() + return label_map.get(code, code) + + +def _get_pool(): + """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" + global _pool, _pool_failed + if _pool is not None: + return _pool + if _pool_failed: + return None + + try: + _pool = psycopg2.pool.SimpleConnectionPool( + minconn=1, + maxconn=3, + host=os.environ.get('PADUS_DB_HOST', 'localhost'), + port=int(os.environ.get('PADUS_DB_PORT', '5432')), + dbname=os.environ.get('PADUS_DB_NAME', 'padus'), + user=os.environ.get('PADUS_DB_USER', 'overture'), + password=os.environ.get('PADUS_DB_PASSWORD', ''), + connect_timeout=5, + ) + logger.info("PAD-US PostgreSQL connection pool initialized") + return _pool + except Exception as e: + _pool_failed = True + logger.warning(f"PAD-US PostgreSQL unavailable, land classification disabled: {e}") + return None + + +def _query_all(sql, params): + """Execute a query and return all rows as a list of dicts, or empty list.""" + pool = _get_pool() + if pool is None: + return [] + + conn = None + try: + conn = pool.getconn() + with conn.cursor() as cur: + cur.execute(sql, params) + rows = cur.fetchall() + if not rows: + return [] + cols = [desc[0] for desc in cur.description] + return [dict(zip(cols, row)) for row in rows] + except Exception as e: + logger.warning(f"PAD-US query error: {e}") + if conn: + try: + conn.rollback() + except Exception: + pass + return [] + finally: + if conn: + try: + pool.putconn(conn) + except Exception: + pass + + +def lookup_landclass(lat, lon): + """ + Look up PAD-US land classifications for a point. + + Returns a list of classification dicts, ordered by area ascending + (smallest/most specific first). Empty list on error or no results. + """ + rows = _query_all( + """SELECT unit_nm, mang_name, mang_type, own_name, own_type, + des_tp, gap_sts, pub_access, category, gis_acres, state_nm + FROM pad_units + WHERE ST_Intersects(geom, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) + ORDER BY gis_acres ASC + LIMIT 10""", + (lon, lat) + ) + + results = [] + for row in rows: + pa_code = str(row.get('pub_access', '')).strip() + + results.append({ + 'unit_name': (row.get('unit_nm') or '').strip(), + 'manager_name': _decode(row.get('mang_name'), AGENCY_NAME_MAP), + 'manager_type': _decode(row.get('mang_type'), AGENCY_TYPE_MAP), + 'owner_type': _decode(row.get('own_type'), AGENCY_TYPE_MAP), + 'designation_type': _decode(row.get('des_tp'), DESIGNATION_TYPE_MAP), + 'gap_status': str(row.get('gap_sts', '')).strip(), + 'public_access': _decode(pa_code, PUBLIC_ACCESS_MAP), + 'public_access_code': pa_code, + 'category': _decode(row.get('category'), CATEGORY_MAP), + 'acres': row.get('gis_acres'), + 'state': _decode(row.get('state_nm'), STATE_MAP), + }) + + return results + + +def format_summary(classifications): + """ + Format a human-readable summary from classification results. + + Returns the most specific unit name, or None if no results. + """ + if not classifications: + return None + # First result is smallest/most specific (ordered by acres ASC) + return classifications[0].get('unit_name') or None diff --git a/lib/nav_tools.py b/lib/nav_tools.py new file mode 100644 index 0000000..d4bb1f7 --- /dev/null +++ b/lib/nav_tools.py @@ -0,0 +1,168 @@ +"""Navigation tools: geocoding via Photon and routing via Valhalla.""" + +import math +import re +import requests + +from .utils import setup_logging + +logger = setup_logging('recon.nav_tools') + +PHOTON_URL = "http://localhost:2322" +VALHALLA_URL = "http://localhost:8002" + +# Regional bias for Photon searches (Idaho-centric for Matt's use case). +# Adjustable — Photon uses these to rank nearby results higher. +GEOCODE_BIAS_LAT = 42.5736 +GEOCODE_BIAS_LON = -114.6066 +GEOCODE_BIAS_ZOOM = 10 + +# Distance threshold (meters) for annotating Photon results with address +# book labels. 75m covers GPS jitter + geocoder imprecision. +ADDRESS_BOOK_ANNOTATION_RADIUS_M = 75 + +# Coordinate regex — handles comma-separated and space-separated forms. +_COORD_RE = re.compile( + r'^\s*(-?\d+\.\d+)\s*[,\s]\s*(-?\d+\.\d+)\s*$' +) + +VALID_MODES = {"auto", "pedestrian", "bicycle", "truck"} + + +def _parse_coords(text: str): + """Return (lat, lon) if text looks like coordinates with valid bounds, else None.""" + m = _COORD_RE.match(text.strip()) + if not m: + return None + lat, lon = float(m.group(1)), float(m.group(2)) + if -90 <= lat <= 90 and -180 <= lon <= 180: + return lat, lon + return None + + +def _haversine_m(lat1, lon1, lat2, lon2): + """Haversine distance in meters between two (lat, lon) points.""" + R = 6_371_000 # Earth radius in meters + rlat1, rlat2 = math.radians(lat1), math.radians(lat2) + dlat = math.radians(lat2 - lat1) + dlon = math.radians(lon2 - lon1) + a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def geocode(query: str, limit: int = 10, lat=None, lon=None, zoom=None): + """Delegate to the structured geocode module. See lib/geocode.py.""" + from . import geocode as geocode_mod + return geocode_mod.geocode(query, limit=limit, lat=lat, lon=lon, zoom=zoom) + + +def _geocode(query: str): + """Internal: returns (lat, lon, display_name) tuple for route().""" + result = geocode(query, limit=1) + results = result.get('results', []) + if not results: + raise ValueError(f"Could not find location: {query}") + top = results[0] + return top['lat'], top['lon'], top['name'] + + +def reverse_geocode(lat: float, lon: float) -> str: + """Reverse geocode coordinates via Photon. Returns formatted address string.""" + try: + resp = requests.get( + f"{PHOTON_URL}/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + data = resp.json() + features = data.get("features", []) + if not features: + return f"{lat}, {lon}" + + props = features[0]["properties"] + parts = [] + for key in ("name", "housenumber", "street", "city", "state", "country", "postcode"): + v = props.get(key) + if v: + parts.append(v) + return ", ".join(parts) if parts else f"{lat}, {lon}" + + +def route(origin: str, destination: str, mode: str = "auto") -> dict: + """ + Get a route between two locations. + + Args: + origin: Starting location — address, place name, or "lat,lon" + destination: Destination — address, place name, or "lat,lon" + mode: Travel mode — auto, pedestrian, bicycle, truck + + Returns: + dict with summary, maneuvers, origin/destination info, and raw shape + """ + if mode not in VALID_MODES: + mode = "auto" + + # Geocode both endpoints + orig_lat, orig_lon, orig_name = _geocode(origin) + dest_lat, dest_lon, dest_name = _geocode(destination) + + # Query Valhalla + valhalla_req = { + "locations": [ + {"lat": orig_lat, "lon": orig_lon}, + {"lat": dest_lat, "lon": dest_lon}, + ], + "costing": mode, + "directions_options": {"units": "miles"}, + } + + try: + resp = requests.post( + f"{VALHALLA_URL}/route", + json=valhalla_req, + timeout=30, + ) + except requests.RequestException: + raise RuntimeError("Navigation service unavailable") + + if resp.status_code != 200: + try: + err = resp.json() + msg = err.get("error", "Unknown routing error") + except Exception: + msg = f"Routing error (HTTP {resp.status_code})" + raise RuntimeError(f"No route found between locations: {msg}") + + data = resp.json() + trip = data["trip"] + summary = trip["summary"] + leg = trip["legs"][0] + + # Build maneuver list + maneuvers = [] + for m in leg["maneuvers"]: + streets = m.get("street_names", []) + maneuvers.append({ + "instruction": m["instruction"], + "distance_miles": round(m.get("length", 0), 2), + "street_name": streets[0] if streets else "", + "type": m.get("type", 0), + "verbal_succinct": m.get("verbal_succinct_transition_instruction", ""), + }) + + return { + "origin": {"name": orig_name, "lat": orig_lat, "lon": orig_lon}, + "destination": {"name": dest_name, "lat": dest_lat, "lon": dest_lon}, + "summary": { + "distance_miles": round(summary["length"], 1), + "time_minutes": round(summary["time"] / 60, 1), + "mode": mode, + }, + "maneuvers": maneuvers, + "shape": leg.get("shape", ""), + } diff --git a/lib/nav_tools_test.py b/lib/nav_tools_test.py new file mode 100644 index 0000000..b987293 --- /dev/null +++ b/lib/nav_tools_test.py @@ -0,0 +1,77 @@ +"""Tests for nav_tools — run against live Photon + Valhalla services.""" + +import sys +import json + +from nav_tools import route, reverse_geocode + + +def test_route_named(): + """route("Buhl Idaho", "Boise Idaho", "auto") returns maneuvers.""" + print("TEST 1: route('Buhl Idaho', 'Boise Idaho', 'auto')") + r = route("Buhl Idaho", "Boise Idaho", "auto") + assert r["summary"]["distance_miles"] > 50, f"Expected >50 mi, got {r['summary']['distance_miles']}" + assert r["summary"]["time_minutes"] > 60, f"Expected >60 min, got {r['summary']['time_minutes']}" + assert len(r["maneuvers"]) > 5, f"Expected >5 maneuvers, got {len(r['maneuvers'])}" + assert r["shape"], "Missing polyline shape" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min, {len(r['maneuvers'])} maneuvers") + print(f" Origin: {r['origin']['name']}") + print(f" Destination: {r['destination']['name']}") + print(f" First maneuver: {r['maneuvers'][0]['instruction']}") + + +def test_route_coords(): + """route with raw lat,lon coordinates.""" + print("\nTEST 2: route('42.5991,-114.7636', '43.615,-116.2023', 'auto')") + r = route("42.5991,-114.7636", "43.615,-116.2023", "auto") + assert r["summary"]["distance_miles"] > 100, f"Expected >100 mi, got {r['summary']['distance_miles']}" + assert len(r["maneuvers"]) > 3, f"Expected >3 maneuvers" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min") + + +def test_route_pedestrian(): + """route with pedestrian mode.""" + print("\nTEST 3: route('Buhl Idaho', 'Boise Idaho', 'pedestrian')") + r = route("Buhl Idaho", "Boise Idaho", "pedestrian") + assert r["summary"]["mode"] == "pedestrian" + assert r["summary"]["time_minutes"] > r["summary"]["distance_miles"], "Walking should take more min than miles" + print(f" OK — {r['summary']['distance_miles']} mi, {r['summary']['time_minutes']} min (pedestrian)") + + +def test_reverse_geocode(): + """reverse_geocode near Buhl, Idaho.""" + print("\nTEST 4: reverse_geocode(42.5991, -114.7636)") + result = reverse_geocode(42.5991, -114.7636) + assert "Buhl" in result or "Twin Falls" in result or "Idaho" in result, f"Expected Buhl/Idaho, got: {result}" + print(f" OK — {result}") + + +def test_route_bad_origin(): + """route with nonexistent place returns clean error.""" + print("\nTEST 5: route('nonexistent place xyz123abc', 'Boise Idaho')") + try: + r = route("nonexistent place xyz123abc", "Boise Idaho") + print(f" FAIL — expected error, got result: {r['summary']}") + return False + except ValueError as e: + print(f" OK — clean error: {e}") + except RuntimeError as e: + print(f" OK — runtime error: {e}") + + +if __name__ == "__main__": + passed = 0 + failed = 0 + tests = [test_route_named, test_route_coords, test_route_pedestrian, test_reverse_geocode, test_route_bad_origin] + + for test in tests: + try: + test() + passed += 1 + except Exception as e: + print(f" FAIL — {e}") + failed += 1 + + print(f"\n{'='*40}") + print(f"Results: {passed} passed, {failed} failed out of {len(tests)}") + sys.exit(1 if failed else 0) diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index dbae24e..4a0847f 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -1,18 +1,22 @@ """ -RECON Netsyms API — Flask Blueprint. +RECON Netsyms API + Geocode — Flask Blueprints. GET /api/netsyms/lookup?q=&country= GET /api/netsyms/health +GET /api/geocode?q=&limit= (Photon-first search with ranked results) """ from flask import Blueprint, request, jsonify from . import netsyms +from . import address_book +from . import nav_tools from .utils import setup_logging logger = setup_logging('recon.netsyms_api') netsyms_bp = Blueprint('netsyms', __name__) +geocode_bp = Blueprint('geocode', __name__) @netsyms_bp.route('/api/netsyms/lookup') @@ -29,3 +33,94 @@ def api_netsyms_lookup(): @netsyms_bp.route('/api/netsyms/health') def api_netsyms_health(): return jsonify(netsyms.health()) + + + +def _safe_float(val, lo, hi): + """Parse val as float; return None if missing, non-numeric, or out of [lo, hi].""" + if val is None: + return None + try: + f = float(val) + if lo <= f <= hi: + return f + except (ValueError, TypeError): + pass + return None + +@geocode_bp.route('/api/geocode') +def api_geocode(): + """ + Photon-first geocoding with ranked candidates. + + GET /api/geocode?q=&limit= + + Always returns 200 OK with: + {query, results: [{name, lat, lon, source, confidence, type, raw, ...}], count} + + - source: "address_book" | "coordinates" | "photon" + - confidence: "exact" | "high" | "medium" | "low" + - type: "nickname" | "coordinates" | "street_address" | "poi" | "locality" + - labeled_as: present when result is within 75m of an address book entry + - Empty results array is valid (no match). No 404s. + """ + q = request.args.get('q', '').strip() + limit = request.args.get('limit', '10') + try: + limit = max(1, min(int(limit), 20)) + except (ValueError, TypeError): + limit = 10 + + # Viewport bias parameters (optional) + lat = _safe_float(request.args.get("lat"), -90, 90) + lon = _safe_float(request.args.get("lon"), -180, 180) + zoom = _safe_float(request.args.get("zoom"), 0, 22) + + result = nav_tools.geocode(q, limit=limit, lat=lat, lon=lon, zoom=zoom) + return jsonify(result) + + +@geocode_bp.route('/api/reverse') +def api_reverse(): + """ + Reverse geocode coordinates via Photon. + + GET /api/reverse?lat=X&lon=Y + + Returns same shape as /api/geocode: + {query: "lat,lon", results: [{name, lat, lon, source, type, raw, ...}], count} + + Returns 200 OK with empty results on no match. 400 on invalid coords. + """ + try: + lat = float(request.args.get('lat', '')) + lon = float(request.args.get('lon', '')) + except (ValueError, TypeError): + return jsonify({'error': 'Missing or invalid lat/lon parameters'}), 400 + + if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): + return jsonify({'error': 'Coordinates out of range'}), 400 + + query_str = f"{lat},{lon}" + + try: + import requests as http_requests + resp = http_requests.get( + "http://localhost:2322/reverse", + params={"lat": lat, "lon": lon, "limit": 1}, + timeout=10, + ) + resp.raise_for_status() + data = resp.json() + features = data.get("features", []) + except Exception: + logger.warning("Photon reverse geocode failed for %s", query_str) + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + if not features: + return jsonify({'query': query_str, 'results': [], 'count': 0}) + + from .geocode import _parse_photon_features + results = _parse_photon_features(features, source='photon_reverse') + + return jsonify({'query': query_str, 'results': results, 'count': len(results)}) diff --git a/lib/osm_categories.py b/lib/osm_categories.py new file mode 100644 index 0000000..dd5217c --- /dev/null +++ b/lib/osm_categories.py @@ -0,0 +1,143 @@ +""" +Human-readable category names for OSM class/type pairs. + +Used by the place detail proxy to turn ("amenity", "cafe") into "Coffee shop". +Covers ~50 common categories; unmapped pairs fall back to title-cased class:type. +""" + +# Exact (class, type) → label +CATEGORY_MAP = { + # Amenity + ("amenity", "cafe"): "Coffee shop", + ("amenity", "restaurant"): "Restaurant", + ("amenity", "fast_food"): "Fast food restaurant", + ("amenity", "bar"): "Bar", + ("amenity", "pub"): "Pub", + ("amenity", "biergarten"): "Beer garden", + ("amenity", "ice_cream"): "Ice cream shop", + ("amenity", "fuel"): "Gas station", + ("amenity", "charging_station"): "EV charging station", + ("amenity", "parking"): "Parking", + ("amenity", "bank"): "Bank", + ("amenity", "atm"): "ATM", + ("amenity", "pharmacy"): "Pharmacy", + ("amenity", "hospital"): "Hospital", + ("amenity", "clinic"): "Clinic", + ("amenity", "dentist"): "Dentist", + ("amenity", "doctors"): "Doctor's office", + ("amenity", "veterinary"): "Veterinarian", + ("amenity", "school"): "School", + ("amenity", "university"): "University", + ("amenity", "college"): "College", + ("amenity", "library"): "Library", + ("amenity", "post_office"): "Post office", + ("amenity", "fire_station"): "Fire station", + ("amenity", "police"): "Police station", + ("amenity", "townhall"): "Town hall", + ("amenity", "place_of_worship"): "Place of worship", + ("amenity", "theatre"): "Theatre", + ("amenity", "cinema"): "Cinema", + ("amenity", "community_centre"): "Community center", + ("amenity", "toilets"): "Restrooms", + ("amenity", "drinking_water"): "Drinking water", + ("amenity", "shelter"): "Shelter", + ("amenity", "camping"): "Campground", + # Shop + ("shop", "supermarket"): "Supermarket", + ("shop", "convenience"): "Convenience store", + ("shop", "hardware"): "Hardware store", + ("shop", "clothes"): "Clothing store", + ("shop", "car_repair"): "Auto repair", + ("shop", "car"): "Car dealership", + ("shop", "bakery"): "Bakery", + ("shop", "butcher"): "Butcher", + # Leisure + ("leisure", "park"): "Park", + ("leisure", "playground"): "Playground", + ("leisure", "sports_centre"): "Sports center", + ("leisure", "swimming_pool"): "Swimming pool", + ("leisure", "golf_course"): "Golf course", + ("leisure", "nature_reserve"): "Nature reserve", + ("leisure", "campsite"): "Campsite", + # Tourism + ("tourism", "hotel"): "Hotel", + ("tourism", "motel"): "Motel", + ("tourism", "guest_house"): "Guest house", + ("tourism", "hostel"): "Hostel", + ("tourism", "camp_site"): "Campsite", + ("tourism", "viewpoint"): "Viewpoint", + ("tourism", "museum"): "Museum", + ("tourism", "information"): "Information", + ("tourism", "attraction"): "Tourist attraction", + ("tourism", "picnic_site"): "Picnic site", + # Natural + ("natural", "peak"): "Peak", + ("natural", "spring"): "Spring", + ("natural", "hot_spring"): "Hot spring", + ("natural", "lake"): "Lake", + ("natural", "water"): "Water body", + ("natural", "cliff"): "Cliff", + ("natural", "cave_entrance"): "Cave", + # Highway + ("highway", "bus_stop"): "Bus stop", + ("highway", "rest_area"): "Rest area", + # Boundary + ("boundary", "administrative"): "Administrative boundary", + ("boundary", "protected_area"): "Protected area", + ("boundary", "national_park"): "National park", + # Place + ("place", "city"): "City", + ("place", "town"): "Town", + ("place", "village"): "Village", + ("place", "hamlet"): "Hamlet", + ("place", "suburb"): "Suburb", + ("place", "neighbourhood"): "Neighborhood", + # Building + ("building", "yes"): "Building", + # Waterway + ("waterway", "river"): "River", + ("waterway", "stream"): "Stream", + ("waterway", "waterfall"): "Waterfall", + # Landuse + ("landuse", "cemetery"): "Cemetery", + ("landuse", "forest"): "Forest", + # Historic + ("historic", "monument"): "Monument", + ("historic", "memorial"): "Memorial", + ("historic", "ruins"): "Ruins", +} + +# Class-level wildcard fallbacks (when exact type isn't mapped) +CLASS_FALLBACKS = { + "shop": "Shop", + "amenity": "Amenity", + "leisure": "Leisure", + "tourism": "Tourism", + "natural": "Natural feature", + "historic": "Historic site", +} + + +def humanize_category(osm_class, osm_type): + """Return a human-readable category string for an OSM class/type pair.""" + if not osm_class or not osm_type: + return "Place" + + osm_class = osm_class.lower() + osm_type = osm_type.lower() + + # Exact match + label = CATEGORY_MAP.get((osm_class, osm_type)) + if label: + return label + + # Class-level wildcard with formatted type + prefix = CLASS_FALLBACKS.get(osm_class) + if prefix: + nice_type = osm_type.replace("_", " ").title() + return f"{prefix}: {nice_type}" if prefix != nice_type else prefix + + # Generic fallback + nice_class = osm_class.replace("_", " ").title() + nice_type = osm_type.replace("_", " ").title() + return f"{nice_class}: {nice_type}" diff --git a/lib/overture.py b/lib/overture.py new file mode 100644 index 0000000..fcbdd18 --- /dev/null +++ b/lib/overture.py @@ -0,0 +1,170 @@ +""" +Overture Maps enrichment layer. + +Provides lookup functions against the local PostgreSQL Overture Places database. +Two strategies: + 1. find_by_osm_id — exact match via OSM cross-reference index + 2. find_by_coords_and_name — spatial + fuzzy name fallback + +Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable, +functions return None gracefully (feature degrades, doesn't crash). +""" +import json +import os + +import psycopg2 +import psycopg2.pool + +from .utils import setup_logging + +logger = setup_logging('recon.overture') + +_pool = None +_pool_failed = False + +# Map full OSM type names to single-letter codes used in Overture sources +OSM_TYPE_MAP = { + 'N': 'n', 'W': 'w', 'R': 'r', + 'node': 'n', 'way': 'w', 'relation': 'r', + 'n': 'n', 'w': 'w', 'r': 'r', +} + + +def _get_pool(): + """Lazy-init the connection pool. Returns None if Postgres is unreachable.""" + global _pool, _pool_failed + if _pool is not None: + return _pool + if _pool_failed: + return None + + try: + _pool = psycopg2.pool.SimpleConnectionPool( + minconn=1, + maxconn=3, + host=os.environ.get('OVERTURE_DB_HOST', 'localhost'), + port=int(os.environ.get('OVERTURE_DB_PORT', '5432')), + dbname=os.environ.get('OVERTURE_DB_NAME', 'overture'), + user=os.environ.get('OVERTURE_DB_USER', 'overture'), + password=os.environ.get('OVERTURE_DB_PASSWORD', ''), + connect_timeout=5, + ) + logger.info("Overture PostgreSQL connection pool initialized") + return _pool + except Exception as e: + _pool_failed = True + logger.warning(f"Overture PostgreSQL unavailable, enrichment disabled: {e}") + return None + + +def _query(sql, params): + """Execute a query and return the first row as a dict, or None.""" + pool = _get_pool() + if pool is None: + return None + + conn = None + try: + conn = pool.getconn() + with conn.cursor() as cur: + cur.execute(sql, params) + row = cur.fetchone() + if row is None: + return None + cols = [desc[0] for desc in cur.description] + return dict(zip(cols, row)) + except Exception as e: + logger.warning(f"Overture query error: {e}") + if conn: + try: + conn.rollback() + except Exception: + pass + return None + finally: + if conn: + try: + pool.putconn(conn) + except Exception: + pass + + +def _format_result(row, match_method): + """Convert a database row dict to the enrichment result shape.""" + if not row: + return None + + socials = row.get('socials') + if isinstance(socials, str): + try: + socials = json.loads(socials) + except (json.JSONDecodeError, TypeError): + socials = None + + return { + 'phone': row.get('phone'), + 'website': row.get('website'), + 'socials': socials, + 'brand_name': row.get('brand_name'), + 'brand_wikidata': row.get('brand_wikidata'), + 'basic_category': row.get('basic_category'), + 'confidence': row.get('confidence'), + 'gers_id': row.get('id'), + 'match_method': match_method, + } + + +def find_by_osm_id(osm_type, osm_id): + """ + Look up an Overture place by its OSM cross-reference. + + Args: + osm_type: OSM type — 'N', 'W', 'R', 'node', 'way', 'relation', or single letter + osm_id: OSM numeric ID + + Returns: + Enrichment dict or None + """ + type_letter = OSM_TYPE_MAP.get(osm_type) + if not type_letter: + return None + + row = _query( + """SELECT id, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata + FROM places + WHERE osm_type = %s AND osm_id = %s + LIMIT 1""", + (type_letter, int(osm_id)) + ) + return _format_result(row, 'osm_xref') + + +def find_by_coords_and_name(lat, lon, name, radius_m=100): + """ + Look up an Overture place by spatial proximity + fuzzy name match. + + Args: + lat: Latitude + lon: Longitude + name: Place name to fuzzy-match + radius_m: Search radius in meters (default 100) + + Returns: + Enrichment dict or None + """ + if not name or not lat or not lon: + return None + + row = _query( + """SELECT id, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata, + similarity(name, %s) AS sim + FROM places + WHERE ST_DWithin(geometry::geography, ST_MakePoint(%s, %s)::geography, %s) + AND similarity(name, %s) > 0.4 + ORDER BY sim DESC, ST_Distance(geometry::geography, ST_MakePoint(%s, %s)::geography) ASC + LIMIT 1""", + (name, lon, lat, radius_m, name, lon, lat) + ) + return _format_result(row, 'coord_name_fuzzy') diff --git a/lib/place_detail.py b/lib/place_detail.py new file mode 100644 index 0000000..e85ee54 --- /dev/null +++ b/lib/place_detail.py @@ -0,0 +1,817 @@ +""" +Place detail proxy — local Nominatim first, Overpass API fallback, SQLite cache. +Overture Maps enrichment layer fills sparse extratags (phone, website, brand). + +Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict +matching the response shape for /api/place//. +""" +import json +import os +import sqlite3 +import time + +import requests as http_requests + +from .osm_categories import humanize_category +from .utils import setup_logging + +logger = setup_logging('recon.place_detail') + +NOMINATIM_URL = "http://localhost:8010/details.php" +OVERPASS_URL = "https://overpass-api.de/api/interpreter" +OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" +VALID_OSM_TYPES = {"N", "W", "R"} + +_db_conn = None + + +# ── SQLite cache ──────────────────────────────────────────────────────── + +def _get_db(): + """Return a module-level SQLite connection (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + os.makedirs(db_dir, exist_ok=True) + db_path = os.path.join(db_dir, 'place_cache.db') + + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS place_cache ( + osm_type TEXT NOT NULL, + osm_id INTEGER NOT NULL, + data TEXT NOT NULL, + source TEXT NOT NULL, + cached_at INTEGER NOT NULL, + PRIMARY KEY (osm_type, osm_id) + ) + """) + _db_conn.commit() + logger.info(f"Place cache DB ready at {db_path}") + return _db_conn + + +def cache_get(osm_type, osm_id): + """Return cached place dict or None.""" + db = _get_db() + row = db.execute( + "SELECT data FROM place_cache WHERE osm_type=? AND osm_id=?", + (osm_type, osm_id) + ).fetchone() + if row: + try: + result = json.loads(row[0]) + result['source'] = 'cache' + return result + except (json.JSONDecodeError, TypeError): + pass + return None + + +def cache_put(osm_type, osm_id, data, source): + """Store a place detail result in the cache (preserves google columns).""" + db = _get_db() + now = int(time.time()) + db.execute(""" + INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(osm_type, osm_id) DO UPDATE SET + data = excluded.data, + source = excluded.source, + cached_at = excluded.cached_at + """, (osm_type, osm_id, json.dumps(data), source, now)) + db.commit() + + +# ── Overture enrichment ───────────────────────────────────────────────── + +def _enrich_with_overture(result, osm_type, osm_id): + """ + Attempt to enrich a place result with Overture Maps data. + Fills sparse extratags (phone, website, brand) without overwriting existing values. + Returns the (possibly enriched) result dict. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_overture_enrichment', False): + return result + except Exception: + return result + + try: + from .overture import find_by_osm_id, find_by_coords_and_name + except ImportError: + logger.debug("Overture module not available") + return result + + enrichment = None + match_method = None + + # Strategy 1: OSM cross-reference (exact) + enrichment = find_by_osm_id(osm_type, osm_id) + if enrichment: + match_method = 'osm_xref' + + # Strategy 2: Coordinate + name fuzzy (fallback) + if not enrichment and result.get('centroid') and result.get('name'): + centroid = result['centroid'] + if centroid.get('lat') and centroid.get('lon'): + enrichment = find_by_coords_and_name( + centroid['lat'], centroid['lon'], result['name'] + ) + if enrichment: + match_method = 'coord_name_fuzzy' + + if not enrichment: + return result + + # Fill sparse extratags (never overwrite existing non-null values) + extratags = result.get('extratags', {}) + fill_map = [ + ('phone', 'phone'), + ('website', 'website'), + ('brand', 'brand_name'), + ('brand:wikidata', 'brand_wikidata'), + ] + for osm_key, overture_key in fill_map: + if not extratags.get(osm_key) and enrichment.get(overture_key): + extratags[osm_key] = enrichment[overture_key] + result['extratags'] = extratags + + # Add source metadata + result['sources'] = { + 'primary': result.get('source', 'unknown'), + 'enrichment': 'overture', + 'overture_match_method': match_method, + 'overture_gers_id': enrichment.get('gers_id'), + 'overture_confidence': enrichment.get('confidence'), + 'overture_basic_category': enrichment.get('basic_category'), + } + + logger.debug(f"Overture enrichment for {osm_type}/{osm_id}: {match_method}") + return result + + + +# ── Google Places enrichment (tertiary, gap-fill only) ────────────── + +# Business POI classes eligible for Google enrichment +_BUSINESS_CLASSES = {'amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'} + +# Fields Google can fill +_GOOGLE_GAP_FIELDS = ('opening_hours', 'phone', 'website') + + +def _enrich_with_google(result, osm_type, osm_id): + """ + Tertiary enrichment via Google Places (New) API. + Only fires for business-type POIs when opening_hours, phone, or website + are still missing after OSM + Overture enrichment. + Fills only empty fields — never overwrites existing values. + """ + # Check feature flag + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_google_places_enrichment', False): + return result + except Exception: + return result + + # Only enrich business-type POIs + poi_class = result.get('class', '') + if poi_class not in _BUSINESS_CLASSES: + return result + + # Check if any gap fields are missing + extratags = result.get('extratags', {}) + gaps = [f for f in _GOOGLE_GAP_FIELDS if not extratags.get(f)] + if not gaps: + logger.debug(f"google_places: skip {osm_type}/{osm_id} — no gaps") + return result + + try: + from . import google_places + except ImportError: + logger.debug("google_places module not available") + return result + + # Check Google cache first + cached_pid, cached_data = google_places.cache_get_google(osm_type, osm_id) + if cached_pid and cached_data: + _apply_google_data(result, cached_data, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': cached_pid, + 'source': 'cache', + } + logger.debug(f"google_places: cache hit for {osm_type}/{osm_id}") + return result + + # Skip if already looked up and found nothing (cached_pid is None) + if cached_pid is not None: + return result + + # Skip new Google API calls for guest users (cached data already returned above) + from .auth import get_user_id + if not get_user_id(): + logger.debug(f"google_places: skip API call for {osm_type}/{osm_id} — guest user") + return result + + # Daily cap check + if not google_places.check_daily_cap(): + return result + + # Search for the place + name = result.get('name', '') + centroid = result.get('centroid', {}) + lat = centroid.get('lat') + lon = centroid.get('lon') + if not name or not lat or not lon: + return result + + place_id = google_places.search_place(name, lat, lon) + if not place_id: + # Cache the miss to avoid repeated lookups + google_places.cache_put_google(osm_type, osm_id, '__miss__', None) + return result + + # Get details + details = google_places.get_place_details(place_id) + if not details: + google_places.cache_put_google(osm_type, osm_id, place_id, None) + return result + + # Cache the result + google_places.cache_put_google(osm_type, osm_id, place_id, details) + + # Apply to result + _apply_google_data(result, details, gaps) + result.setdefault('sources', {})['google_places'] = { + 'place_id': place_id, + 'source': 'api', + 'daily_count': google_places.get_daily_count(), + } + + return result + + +def _apply_google_data(result, google_data, gaps): + """Apply Google Places data to fill gap fields only.""" + extratags = result.get('extratags', {}) + if 'opening_hours' in gaps: + osm_hours = google_data.get('opening_hours') + if osm_hours: + extratags['opening_hours'] = osm_hours + elif google_data.get('opening_hours_raw'): + extratags['opening_hours_raw'] = google_data['opening_hours_raw'] + if 'phone' in gaps and google_data.get('phone_number'): + extratags['phone'] = google_data['phone_number'] + if 'website' in gaps and google_data.get('website'): + extratags['website'] = google_data['website'] + result['extratags'] = extratags + + + + +# ── Wiki link rewriting ───────────────────────────────────────────────── + +# Extratag keys that may contain wiki references +_WIKI_TAGS = ('wikipedia', 'wikidata', 'wikivoyage', 'appropedia') + + +def _enrich_wiki_links(result): + """ + Rewrite wiki-related extratags to local Kiwix URLs where available. + Falls back to public URLs. Only runs when has_wiki_rewriting is enabled. + Returns the (possibly enriched) result dict. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_wiki_rewriting', False): + return result + except Exception: + return result + + try: + from .wiki_rewrite import rewrite_wiki_link + except ImportError: + logger.debug("wiki_rewrite module not available") + return result + + extratags = result.get('extratags', {}) + if not extratags: + return result + + rewrites = {} + for tag in _WIKI_TAGS: + value = extratags.get(tag) + if not value: + continue + url, status = rewrite_wiki_link(tag, value) + if status != 'original': + extratags[tag] = url + rewrites[tag] = status + + if rewrites: + result['extratags'] = extratags + result.setdefault('sources', {})['wiki_rewrites'] = rewrites + logger.debug(f"Wiki rewrites for {result.get('osm_type')}/{result.get('osm_id')}: {rewrites}") + + return result + +# ── Nominatim parsing ─────────────────────────────────────────────────── + +# Nominatim address array uses rank_address to indicate what each entry is. +# We map rank ranges to our flat address fields. +RANK_TO_FIELD = { + 4: 'country', + 5: 'postcode', + 6: 'state', # rank 6 = county in US, but we try name matching + 8: 'state', + 12: 'county', + 16: 'city', + 20: 'neighbourhood', + 22: 'neighbourhood', + 26: 'road', + 28: 'house_number', +} + + +def _parse_nominatim_address(address_array, country_code=None): + """Parse Nominatim's ranked address array into a flat address dict.""" + addr = { + 'house_number': None, + 'road': None, + 'neighbourhood': None, + 'city': None, + 'county': None, + 'state': None, + 'postcode': None, + 'country': None, + 'country_code': country_code, + } + + if not address_array: + return addr + + for entry in address_array: + if not entry.get('isaddress', False): + continue + + name = entry.get('localname', '') + rank = entry.get('rank_address', 0) + etype = entry.get('type', '') + eclass = entry.get('class', '') + + # Explicit type-based assignments (more reliable than rank alone) + if etype == 'country' and eclass == 'place': + addr['country'] = name + elif etype == 'state' or (eclass == 'boundary' and etype == 'administrative' and rank == 8): + if not addr['state']: + addr['state'] = name + elif etype == 'county' or (eclass == 'boundary' and etype == 'administrative' and rank in (10, 12)): + if not addr['county']: + addr['county'] = name + elif etype in ('city', 'town', 'village', 'hamlet') and eclass == 'place': + if not addr['city']: + addr['city'] = name + elif eclass == 'boundary' and etype == 'administrative' and rank == 16: + # City-level admin boundary (common in US) + if not addr['city']: + addr['city'] = name + elif etype == 'postcode': + addr['postcode'] = name + elif eclass == 'highway' or rank == 26: + if not addr['road']: + addr['road'] = name + elif etype == 'house_number' or rank == 28: + addr['house_number'] = name + elif rank in (20, 22) and not addr['neighbourhood']: + addr['neighbourhood'] = name + + # Remove county from output (not in spec) + addr.pop('county', None) + + return addr + + +def _parse_nominatim(data): + """Parse a Nominatim /details response into our canonical shape.""" + osm_type = data.get('osm_type', '') + osm_id = data.get('osm_id', 0) + osm_class = data.get('category', '') + osm_type_tag = data.get('type', '') + + # Centroid + centroid_geom = data.get('centroid', {}) + coords = centroid_geom.get('coordinates', [0, 0]) + centroid = {'lat': coords[1], 'lon': coords[0]} if len(coords) >= 2 else {'lat': 0, 'lon': 0} + + # Names + names = data.get('names', {}) + display_name = data.get('localname', '') or names.get('name', '') + + # Address + address = _parse_nominatim_address( + data.get('address', []), + country_code=data.get('country_code') + ) + + # Use calculated_postcode if address parse didn't find one + if not address.get('postcode') and data.get('calculated_postcode'): + address['postcode'] = data['calculated_postcode'] + + # Extratags + raw_extra = data.get('extratags', {}) + extratags = { + 'opening_hours': raw_extra.get('opening_hours'), + 'phone': raw_extra.get('phone') or raw_extra.get('contact:phone'), + 'website': raw_extra.get('website') or raw_extra.get('contact:website') or raw_extra.get('url'), + 'email': raw_extra.get('email') or raw_extra.get('contact:email'), + 'wikipedia': raw_extra.get('wikipedia'), + 'wikidata': raw_extra.get('wikidata'), + 'cuisine': raw_extra.get('cuisine'), + 'operator': raw_extra.get('operator'), + 'wheelchair': raw_extra.get('wheelchair'), + 'fee': raw_extra.get('fee'), + 'takeaway': raw_extra.get('takeaway'), + } + + # Category: use extratags.place for boundaries (e.g. "city"), else class/type + effective_class = osm_class + effective_type = osm_type_tag + if osm_class == 'boundary' and osm_type_tag == 'administrative': + place_tag = raw_extra.get('place') or raw_extra.get('linked_place') + if place_tag: + effective_class = 'place' + effective_type = place_tag + + category = humanize_category(effective_class, effective_type) + + # Filter names: only include extra name tags, not the bare "name" + extra_names = {k: v for k, v in names.items() if k != 'name'} if names else {} + + # Boundary geometry (polygon/multipolygon from Nominatim) + boundary = None + geom = data.get('geometry') + if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): + boundary = geom + + return { + 'osm_type': osm_type, + 'osm_id': osm_id, + 'name': display_name, + 'category': category, + 'class': osm_class, + 'type': osm_type_tag, + 'address': address, + 'centroid': centroid, + 'extratags': extratags, + 'names': extra_names if extra_names else None, + 'source': 'nominatim_local', + 'boundary': boundary, + } + + +# ── Overpass parsing ──────────────────────────────────────────────────── + +OVERPASS_TYPE_MAP = {'N': 'node', 'W': 'way', 'R': 'relation'} + + +def _build_overpass_query(osm_type, osm_id): + """Build an Overpass QL query for a single element.""" + elem = OVERPASS_TYPE_MAP.get(osm_type) + if not elem: + return None + return f"[out:json][timeout:10];{elem}({osm_id});out tags center;" + + +def _parse_overpass(data, osm_type, osm_id): + """Parse an Overpass API response into our canonical shape.""" + elements = data.get('elements', []) + if not elements: + return None + + elem = elements[0] + tags = elem.get('tags', {}) + + # Centroid: Overpass returns lat/lon for nodes, center for ways/relations + lat = elem.get('lat') or (elem.get('center', {}).get('lat')) + lon = elem.get('lon') or (elem.get('center', {}).get('lon')) + centroid = {'lat': lat, 'lon': lon} if lat and lon else {'lat': 0, 'lon': 0} + + # Determine class/type from tags — Overpass doesn't have a canonical class field + # Use the first recognized class tag + osm_class = '' + osm_type_tag = '' + for cls in ('amenity', 'shop', 'leisure', 'tourism', 'natural', 'highway', + 'boundary', 'place', 'building', 'waterway', 'landuse', 'historic'): + if cls in tags: + osm_class = cls + osm_type_tag = tags[cls] + break + + category = humanize_category(osm_class, osm_type_tag) + + # Address from addr:* tags + address = { + 'house_number': tags.get('addr:housenumber'), + 'road': tags.get('addr:street'), + 'neighbourhood': tags.get('addr:suburb') or tags.get('addr:neighbourhood'), + 'city': tags.get('addr:city'), + 'state': tags.get('addr:state'), + 'postcode': tags.get('addr:postcode'), + 'country': tags.get('addr:country'), + 'country_code': tags.get('addr:country_code', + tags.get('addr:country', '')).lower()[:2] or None, + } + + # Extratags + extratags = { + 'opening_hours': tags.get('opening_hours'), + 'phone': tags.get('phone') or tags.get('contact:phone'), + 'website': tags.get('website') or tags.get('contact:website') or tags.get('url'), + 'email': tags.get('email') or tags.get('contact:email'), + 'wikipedia': tags.get('wikipedia'), + 'wikidata': tags.get('wikidata'), + 'cuisine': tags.get('cuisine'), + 'operator': tags.get('operator'), + 'wheelchair': tags.get('wheelchair'), + 'fee': tags.get('fee'), + 'takeaway': tags.get('takeaway'), + } + + # Names + name = tags.get('name', '') + extra_names = {} + for k, v in tags.items(): + if k.startswith('name:') or k in ('alt_name', 'old_name', 'short_name', 'official_name'): + extra_names[k] = v + + return { + 'osm_type': osm_type, + 'osm_id': osm_id, + 'name': name, + 'category': category, + 'class': osm_class, + 'type': osm_type_tag, + 'address': address, + 'centroid': centroid, + 'extratags': extratags, + 'names': extra_names if extra_names else None, + 'source': 'overpass', + } + + +# ── Public API ────────────────────────────────────────────────────────── + +def get_place_detail(osm_type, osm_id): + """ + Fetch place details for an OSM element. + + Returns (dict, status_code): + - (data, 200) on success + - (error_dict, 404) if not found in any source + - (error_dict, 502) if both sources error + """ + osm_type = osm_type.upper() + if osm_type not in VALID_OSM_TYPES: + return {'error': f'Invalid osm_type: {osm_type}. Must be N, W, or R.'}, 400 + + if osm_id <= 0: + return {'error': 'osm_id must be a positive integer'}, 400 + + # 1. Check cache + cached = cache_get(osm_type, osm_id) + if cached: + logger.debug(f"Cache hit: {osm_type}/{osm_id}") + return cached, 200 + + # 2. Try local Nominatim first + nominatim_result = None + nominatim_error = None + try: + resp = http_requests.get(NOMINATIM_URL, params={ + 'osmtype': osm_type, + 'osmid': osm_id, + 'format': 'json', + 'addressdetails': 1, + 'hierarchy': 0, + 'keywords': 0, + 'polygon_geojson': 1, + }, timeout=5) + + if resp.status_code == 200: + data = resp.json() + # Nominatim returns a result even for IDs not in its DB, + # but they'll have empty/minimal data. Check for osm_id match. + if data.get('osm_id') == osm_id: + nominatim_result = _parse_nominatim(data) + logger.debug(f"Nominatim hit: {osm_type}/{osm_id}") + except Exception as e: + nominatim_error = str(e) + logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}") + + if nominatim_result: + nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) + nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) + nominatim_result = _enrich_wiki_links(nominatim_result) + cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') + return nominatim_result, 200 + + # 3. Fallback to Overpass + overpass_result = None + overpass_error = None + try: + query = _build_overpass_query(osm_type, osm_id) + if query: + resp = http_requests.post( + OVERPASS_URL, + data={'data': query}, + headers={'User-Agent': OVERPASS_UA}, + timeout=10, + ) + if resp.status_code == 200: + data = resp.json() + overpass_result = _parse_overpass(data, osm_type, osm_id) + if overpass_result: + logger.debug(f"Overpass hit: {osm_type}/{osm_id}") + elif resp.status_code == 429: + overpass_error = "Overpass rate limited" + logger.warning(f"Overpass 429 for {osm_type}/{osm_id}") + else: + overpass_error = f"Overpass HTTP {resp.status_code}" + except Exception as e: + overpass_error = str(e) + logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}") + + if overpass_result: + overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) + overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) + overpass_result = _enrich_wiki_links(overpass_result) + cache_put(osm_type, osm_id, overpass_result, 'overpass') + return overpass_result, 200 + + # 4. Both failed + if nominatim_error and overpass_error: + logger.error(f"Both sources failed for {osm_type}/{osm_id}: " + f"Nominatim={nominatim_error}, Overpass={overpass_error}") + return {'error': 'Both data sources unavailable'}, 502 + + # Not found in either source (no errors, just empty results) + return {'error': f'{osm_type}/{osm_id} not found'}, 404 + + +# ── Wikidata lookup ───────────────────────────────────────────────────── + +WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php" + +def get_place_by_wikidata(wikidata_id): + """ + Fetch place details from Wikidata entity. + + Returns (dict, status_code): + - (data, 200) on success + - (error_dict, 404) if entity not found + - (error_dict, 400) if invalid ID format + - (error_dict, 502) on API error + """ + # Validate wikidata ID format (Q followed by digits) + wikidata_id = wikidata_id.upper().strip() + if not wikidata_id.startswith("Q") or not wikidata_id[1:].isdigit(): + return {"error": f"Invalid wikidata ID: {wikidata_id}. Must be Q followed by digits."}, 400 + + try: + resp = http_requests.get(WIKIDATA_API_URL, params={ + "action": "wbgetentities", + "ids": wikidata_id, + "format": "json", + "languages": "en", + "props": "labels|descriptions|claims|sitelinks", + }, timeout=10, headers={"User-Agent": "Navi/1.0 (forge.echo6.co/matt/recon)"}) + + if resp.status_code != 200: + logger.warning(f"Wikidata API error for {wikidata_id}: HTTP {resp.status_code}") + return {"error": "Wikidata API error"}, 502 + + data = resp.json() + entities = data.get("entities", {}) + entity = entities.get(wikidata_id) + + if not entity or entity.get("missing"): + return {"error": f"Wikidata entity {wikidata_id} not found"}, 404 + + # Extract basic info + labels = entity.get("labels", {}) + descriptions = entity.get("descriptions", {}) + claims = entity.get("claims", {}) + + name = labels.get("en", {}).get("value", wikidata_id) + description = descriptions.get("en", {}).get("value", "") + + # Extract coordinates from P625 (coordinate location) + lat, lon = None, None + if "P625" in claims: + coord_claim = claims["P625"] + if coord_claim and coord_claim[0].get("mainsnak", {}).get("datavalue"): + coord_val = coord_claim[0]["mainsnak"]["datavalue"]["value"] + lat = coord_val.get("latitude") + lon = coord_val.get("longitude") + + # Extract population from P1082 + population = None + if "P1082" in claims: + pop_claims = claims["P1082"] + if pop_claims: + # Get the most recent population value + for claim in pop_claims: + if claim.get("mainsnak", {}).get("datavalue"): + try: + population = int(claim["mainsnak"]["datavalue"]["value"]["amount"].lstrip("+")) + break + except (KeyError, ValueError): + pass + + # Extract country from P17 + country = None + if "P17" in claims: + country_claims = claims["P17"] + if country_claims and country_claims[0].get("mainsnak", {}).get("datavalue"): + country_id = country_claims[0]["mainsnak"]["datavalue"]["value"]["id"] + # Could resolve this to a name, but for now just store the ID + + # Extract instance of (P31) for type classification + instance_of = [] + if "P31" in claims: + for claim in claims["P31"]: + if claim.get("mainsnak", {}).get("datavalue"): + instance_of.append(claim["mainsnak"]["datavalue"]["value"]["id"]) + + # Extract OSM relation ID if available (P402) + osm_relation_id = None + if "P402" in claims: + osm_claims = claims["P402"] + if osm_claims and osm_claims[0].get("mainsnak", {}).get("datavalue"): + osm_relation_id = osm_claims[0]["mainsnak"]["datavalue"]["value"] + + # Extract Wikipedia sitelink + sitelinks = entity.get("sitelinks", {}) + wikipedia = None + if "enwiki" in sitelinks: + wiki_title = sitelinks["enwiki"].get("title", "") + if wiki_title: + wikipedia = f"en:{wiki_title}" + + result = { + "wikidata_id": wikidata_id, + "name": name, + "description": description, + "centroid": {"lat": lat, "lon": lon} if lat and lon else None, + "population": population, + "instance_of": instance_of, + "osm_relation_id": osm_relation_id, + "source": "wikidata", + "extratags": { + "wikidata": wikidata_id, + }, + } + + if wikipedia: + result["extratags"]["wikipedia"] = wikipedia + + # Fetch boundary polygon from Nominatim if we have an OSM relation ID + boundary = None + if osm_relation_id: + try: + nom_resp = http_requests.get(NOMINATIM_URL, params={ + 'osmtype': 'R', + 'osmid': osm_relation_id, + 'format': 'json', + 'polygon_geojson': 1, + }, timeout=5) + if nom_resp.status_code == 200: + nom_data = nom_resp.json() + geom = nom_data.get('geometry') + if geom and geom.get('type') in ('Polygon', 'MultiPolygon'): + boundary = geom + logger.debug(f"Wikidata boundary hit for {wikidata_id}") + except Exception as e: + logger.debug(f"Wikidata boundary fetch failed: {e}") + + result["boundary"] = boundary + + logger.debug(f"Wikidata hit: {wikidata_id} -> {name}") + return result, 200 + + except Exception as e: + logger.warning(f"Wikidata error for {wikidata_id}: {e}") + return {"error": "Wikidata lookup failed"}, 502 diff --git a/lib/processors/zim_processor.py b/lib/processors/zim_processor.py index 6f5c887..b258408 100644 --- a/lib/processors/zim_processor.py +++ b/lib/processors/zim_processor.py @@ -77,73 +77,10 @@ def _text_hash(text): return hashlib.md5(text.encode('utf-8')).hexdigest() -def _flatten_table(table_el): - """Convert a element to pipe-delimited text. - - Each becomes a row with cells joined by ' | '. - Returns the formatted table as a string with blank lines around it. - """ - rows = [] - for tr in table_el.iter('tr'): - cells = [] - for cell in tr: - if cell.tag in ('td', 'th'): - cell_text = (cell.text_content() or '').strip() - # Collapse internal whitespace in each cell - cell_text = re.sub(r'\s+', ' ', cell_text) - if cell_text: - cells.append(cell_text) - if cells: - rows.append(' | '.join(cells)) - if not rows: - return '' - return '\n'.join(rows) - - -def _preprocess_tree(doc): - """Pre-process HTML tree to add delimiters before text_content() flattens it. - - Handles:
,
,
  • ,
    ,
    -- elements that lxml's - text_content() would concatenate without any separators. - """ - from lxml import etree - - # 1. Replace
  • elements with their pipe-delimited text - for table in list(doc.iter('table')): - formatted = _flatten_table(table) - if formatted: - replacement = etree.Element('div') - replacement.text = '\n\n' + formatted + '\n\n' - parent = table.getparent() - if parent is not None: - parent.replace(table, replacement) - else: - table.drop_tree() - - # 2.
    -> inject newline - for br in list(doc.iter('br')): - br.tail = '\n' + (br.tail or '') - - # 3.
  • -> inject newline + "- " prefix - for li in list(doc.iter('li')): - li.text = '- ' + (li.text or '') - li.tail = '\n' + (li.tail or '') - - # 4.
    -> inject newline before - for dt in list(doc.iter('dt')): - dt.tail = '\n' + (dt.tail or '') - - # 5.
    -> inject newline + indent - for dd in list(doc.iter('dd')): - dd.text = ' ' + (dd.text or '') - dd.tail = '\n' + (dd.tail or '') - - def _html_to_text(html_bytes): """Convert HTML bytes to clean text via lxml. Strips nav, footer, script, style elements. Decodes entities. - Pre-processes tables, lists, and line breaks for proper delimiters. Normalizes whitespace. """ try: @@ -156,9 +93,6 @@ def _html_to_text(html_bytes): for el in doc.iter(tag): el.drop_tree() - # Pre-process tree: tables -> pipe-delimited, br -> newlines, li -> dashes - _preprocess_tree(doc) - # Extract text text = doc.text_content() diff --git a/lib/wiki_rewrite.py b/lib/wiki_rewrite.py new file mode 100644 index 0000000..d884635 --- /dev/null +++ b/lib/wiki_rewrite.py @@ -0,0 +1,324 @@ +""" +Wiki link rewriter — rewrites OSM wikipedia/wikidata/wikivoyage/appropedia +links to local Kiwix URLs where the article exists in a loaded ZIM. + +Falls back silently to public URLs when article is unavailable locally. +Caches positive results only in place_cache.db. + +Kiwix catalog is parsed from the OPDS Atom feed at startup and refreshed +hourly to pick up newly loaded ZIMs without a restart. + +Operations note: + - After loading a new ZIM, either restart RECON (forces fresh catalog + fetch) or wait up to 1 hour for automatic refresh. + - To invalidate the wiki cache (e.g. after ZIM update): + sqlite3 /opt/recon/data/place_cache.db "DELETE FROM wiki_cache;" +""" +import os +import re +import sqlite3 +import time +import xml.etree.ElementTree as ET +from urllib.parse import unquote, quote + +import requests as http_requests + +from .utils import setup_logging + +logger = setup_logging('recon.wiki_rewrite') + +# ── Configuration ─────────────────────────────────────────────────────── + +KIWIX_BASE = "http://localhost:8430" +KIWIX_PUBLIC_BASE = "https://wiki.echo6.co" +KIWIX_CATALOG_URL = f"{KIWIX_BASE}/catalog/v2/entries" +HEAD_TIMEOUT = 1.5 # seconds +CATALOG_REFRESH_INTERVAL = 3600 # 1 hour + +# OPDS Atom namespace +_ATOM_NS = "http://www.w3.org/2005/Atom" + +# ── ZIM catalog map ───────────────────────────────────────────────────── + +_zim_map = {} # source_type → content_path e.g. 'wikipedia' → 'wikipedia_en_all_maxi_2026-02' +_zim_map_ts = 0.0 # last refresh timestamp + +# Prefix-to-source-type mapping (order matters: longest prefix first) +_ZIM_PREFIX_MAP = [ + ('wikipedia_en_all', 'wikipedia'), + ('appropedia_en_all', 'appropedia'), + ('wikivoyage_en', 'wikivoyage'), + ('wikidata_en', 'wikidata'), +] + + +def _discover_zims(): + """Parse Kiwix OPDS Atom catalog to map source types to content paths.""" + global _zim_map, _zim_map_ts + + try: + resp = http_requests.get(KIWIX_CATALOG_URL, timeout=5) + if resp.status_code != 200: + logger.warning(f"Kiwix catalog returned HTTP {resp.status_code}") + return + + root = ET.fromstring(resp.content) + new_map = {} + + for entry in root.findall(f"{{{_ATOM_NS}}}entry"): + name_el = entry.find(f"{{{_ATOM_NS}}}name") + if name_el is None: + continue + book_name = name_el.text or "" + + # + content_path = None + for link in entry.findall(f"{{{_ATOM_NS}}}link"): + if link.get("type") == "text/html": + href = link.get("href", "") + if href.startswith("/content/"): + content_path = href[len("/content/"):] + break + + if not content_path: + continue + + # Match book name against known prefixes + for prefix, source_type in _ZIM_PREFIX_MAP: + if book_name.startswith(prefix): + new_map[source_type] = content_path + break + + _zim_map = new_map + _zim_map_ts = time.time() + logger.info(f"ZIM catalog refreshed: {new_map}") + + except Exception as e: + logger.warning(f"Failed to discover ZIMs from Kiwix catalog: {e}") + + +def _ensure_zim_map(): + """Lazy-load and refresh ZIM map if stale.""" + if not _zim_map or (time.time() - _zim_map_ts) > CATALOG_REFRESH_INTERVAL: + _discover_zims() + + +# ── Database (wiki_cache in place_cache.db) ───────────────────────────── + +_db_conn = None + + +def _get_db(): + """Return a module-level SQLite connection to place_cache.db (lazy init).""" + global _db_conn + if _db_conn is not None: + return _db_conn + + db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') + os.makedirs(db_dir, exist_ok=True) + db_path = os.path.join(db_dir, 'place_cache.db') + + _db_conn = sqlite3.connect(db_path, check_same_thread=False) + _db_conn.execute("PRAGMA journal_mode=WAL") + _db_conn.execute("PRAGMA synchronous=NORMAL") + _db_conn.execute(""" + CREATE TABLE IF NOT EXISTS wiki_cache ( + source_type TEXT NOT NULL, + article_id TEXT NOT NULL, + kiwix_url TEXT NOT NULL, + cached_at INTEGER NOT NULL, + PRIMARY KEY (source_type, article_id) + ) + """) + _db_conn.commit() + logger.info(f"Wiki cache table ready in {db_path}") + return _db_conn + + +# ── URL classification ────────────────────────────────────────────────── + +# Patterns for OSM wikipedia/wikidata tag values +_WIKI_TAG_RE = re.compile(r'^(?:en:)?(.+)$') # "en:Title" or just "Title" +_WIKI_URL_RE = re.compile(r'https?://en\.wikipedia\.org/wiki/(.+)') +_WIKIDATA_TAG_RE = re.compile(r'^(Q\d+)$') +_WIKIDATA_URL_RE = re.compile(r'https?://(?:www\.)?wikidata\.org/wiki/(Q\d+)') +_WIKIVOYAGE_URL_RE = re.compile(r'https?://en\.wikivoyage\.org/wiki/(.+)') +_APPROPEDIA_URL_RE = re.compile(r'https?://(?:www\.)?appropedia\.org/(?:wiki/)?(.+)') + + +def _normalize_article_id(article_id): + """Normalize article ID to MediaWiki/Kiwix convention: spaces → underscores.""" + return article_id.replace(' ', '_') + + +def classify_wiki_link(tag_name, value): + """ + Classify an OSM extratag value into (source_type, article_id) or None. + + tag_name: the extratags key ('wikipedia', 'wikidata', etc.) + value: the raw tag value from OSM + + Article IDs are normalized to MediaWiki convention (spaces → underscores). + """ + if not value or not isinstance(value, str): + return None + + value = value.strip() + + if tag_name == 'wikidata': + m = _WIKIDATA_TAG_RE.match(value) + if m: + return ('wikidata', m.group(1)) + m = _WIKIDATA_URL_RE.match(value) + if m: + return ('wikidata', m.group(1)) + return None + + if tag_name == 'wikipedia': + # URL form: https://en.wikipedia.org/wiki/Title + m = _WIKI_URL_RE.match(value) + if m: + return ('wikipedia', _normalize_article_id(unquote(m.group(1)))) + # Tag form: "en:Title" or "Title" + m = _WIKI_TAG_RE.match(value) + if m: + return ('wikipedia', _normalize_article_id(m.group(1))) + return None + + if tag_name == 'wikivoyage': + m = _WIKIVOYAGE_URL_RE.match(value) + if m: + return ('wikivoyage', _normalize_article_id(unquote(m.group(1)))) + # Plain tag: "en:Title" or "Title" + m = _WIKI_TAG_RE.match(value) + if m: + return ('wikivoyage', _normalize_article_id(m.group(1))) + return None + + if tag_name == 'appropedia': + m = _APPROPEDIA_URL_RE.match(value) + if m: + return ('appropedia', _normalize_article_id(unquote(m.group(1)))) + return ('appropedia', _normalize_article_id(value)) + + return None + + +# ── URL builders ──────────────────────────────────────────────────────── + +def build_kiwix_url(source_type, article_id): + """Build a public Kiwix URL. Returns None if source_type not in ZIM map.""" + _ensure_zim_map() + content_path = _zim_map.get(source_type) + if not content_path: + return None + return f"{KIWIX_PUBLIC_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" + + +_PUBLIC_URL_TEMPLATES = { + 'wikipedia': "https://en.wikipedia.org/wiki/{id}", + 'wikidata': "https://www.wikidata.org/wiki/{id}", + 'wikivoyage': "https://en.wikivoyage.org/wiki/{id}", + 'appropedia': "https://www.appropedia.org/wiki/{id}", +} + + +def build_public_url(source_type, article_id): + """Build the canonical public URL for a wiki article.""" + tmpl = _PUBLIC_URL_TEMPLATES.get(source_type) + if not tmpl: + return None + return tmpl.format(id=quote(article_id, safe='/:@!$&\'()*+,;=')) + + +# ── Kiwix availability check ─────────────────────────────────────────── + +def check_kiwix_has_article(source_type, article_id): + """ + Check if an article exists in local Kiwix. + + Returns (bool, url): + - (True, kiwix_public_url) if article exists locally + - (False, None) if not found or Kiwix unavailable + + Only positive results are cached. + """ + # Check cache first + db = _get_db() + row = db.execute( + "SELECT kiwix_url FROM wiki_cache WHERE source_type=? AND article_id=?", + (source_type, article_id) + ).fetchone() + if row: + return (True, row[0]) + + # Build local HEAD URL + _ensure_zim_map() + content_path = _zim_map.get(source_type) + if not content_path: + return (False, None) + + head_url = f"{KIWIX_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}" + + try: + resp = http_requests.head(head_url, timeout=HEAD_TIMEOUT, allow_redirects=True) + if resp.status_code == 200: + kiwix_url = build_kiwix_url(source_type, article_id) + # Cache positive result + now = int(time.time()) + db.execute(""" + INSERT OR REPLACE INTO wiki_cache (source_type, article_id, kiwix_url, cached_at) + VALUES (?, ?, ?, ?) + """, (source_type, article_id, kiwix_url, now)) + db.commit() + return (True, kiwix_url) + else: + return (False, None) + except Exception as e: + logger.debug(f"Kiwix HEAD failed for {source_type}/{article_id}: {e}") + return (False, None) + + +# ── Primary entry point ──────────────────────────────────────────────── + +def rewrite_wiki_link(tag_name, value): + """ + Rewrite an OSM wiki tag value to a local Kiwix URL if available. + + Returns (url, 'local'|'public') or (None, None) if unrecognized. + """ + classified = classify_wiki_link(tag_name, value) + if not classified: + return (value, 'original') + + source_type, article_id = classified + + # Try local Kiwix + found, kiwix_url = check_kiwix_has_article(source_type, article_id) + if found and kiwix_url: + return (kiwix_url, 'local') + + # Fall back to public URL + public_url = build_public_url(source_type, article_id) + if public_url: + return (public_url, 'public') + + return (value, 'original') + + +# ── Discovery stubs (disabled, for future activation) ─────────────────── + +def discover_wikivoyage_article(name, category, lat, lon): + """ + Discover a related Wikivoyage article for a place. + Enabled by has_wiki_discovery. Currently returns None. + """ + return None + + +def discover_appropedia_article(name, category): + """ + Discover a related Appropedia article for a place. + Enabled by has_wiki_discovery. Currently returns None. + """ + return None diff --git a/requirements.txt b/requirements.txt index 1da21bc..f643cd8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,6 @@ anyio==4.12.1 babel==2.18.0 beautifulsoup4==4.14.3 blinker==1.9.0 -cachetools==7.1.3 certifi==2026.1.4 cffi==2.0.0 charset-normalizer==3.4.4 diff --git a/scripts/overture_import.py b/scripts/overture_import.py new file mode 100644 index 0000000..0b6ba67 --- /dev/null +++ b/scripts/overture_import.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +"""Overture Maps Places → PostgreSQL import script (v2). + +Downloads Overture Places Parquet from S3 via DuckDB (public bucket, no credentials), +filters to North America bounding box, and inserts into local PostgreSQL with PostGIS. + +Usage: + cd /opt/recon && venv/bin/python scripts/overture_import.py + +Re-runnable (idempotent via UPSERT). +""" + +import json +import logging +import os +import re +import sys +import time + +import duckdb +import psycopg2 +import psycopg2.extras + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%H:%M:%S' +) +log = logging.getLogger('overture_import') + +# --- Config --- +OVERTURE_RELEASE = '2026-04-15.0' +S3_PATH = f's3://overturemaps-us-west-2/release/{OVERTURE_RELEASE}/theme=places/type=place/*' + +# North America bounding box (generous — includes Hawaii, Puerto Rico, Canada) +BBOX = { + 'xmin': -170.0, + 'xmax': -50.0, + 'ymin': 15.0, + 'ymax': 85.0, +} + +BATCH_SIZE = 50_000 +OSM_RECORD_RE = re.compile(r'^([nwr])(\d+)@\d+$') + +DB_CONFIG = { + 'host': os.environ.get('OVERTURE_DB_HOST', 'localhost'), + 'port': int(os.environ.get('OVERTURE_DB_PORT', '5432')), + 'dbname': os.environ.get('OVERTURE_DB_NAME', 'overture'), + 'user': os.environ.get('OVERTURE_DB_USER', 'overture'), + 'password': os.environ.get('OVERTURE_DB_PASSWORD', ''), +} + + +def create_table(conn): + """Create places table and indexes if they don't exist.""" + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS places ( + id TEXT PRIMARY KEY, + geometry GEOMETRY(Point, 4326), + name TEXT, + basic_category TEXT, + confidence REAL, + phone TEXT, + website TEXT, + socials JSONB, + brand_name TEXT, + brand_wikidata TEXT, + osm_type CHAR(1), + osm_id BIGINT, + source_record_id TEXT, + raw_sources JSONB + ); + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_osm + ON places(osm_type, osm_id) WHERE osm_type IS NOT NULL; + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_geom + ON places USING GIST(geometry); + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS idx_places_name_trgm + ON places USING GIN(name gin_trgm_ops); + """) + conn.commit() + log.info('Table and indexes ready') + + +def parse_osm_ref(sources): + """Extract OSM type letter and ID from Overture sources array.""" + if not sources: + return None, None, None + for src in sources: + record_id = None + if isinstance(src, dict): + record_id = src.get('record_id', '') + elif hasattr(src, '__getitem__'): + # DuckDB struct — try attribute access + try: + record_id = src['record_id'] + except (KeyError, TypeError, IndexError): + pass + if not record_id: + continue + m = OSM_RECORD_RE.match(str(record_id)) + if m: + return m.group(1), int(m.group(2)), str(record_id) + return None, None, None + + +def run_import(): + """Main import: DuckDB reads S3 Parquet → PostgreSQL via chunked OFFSET/LIMIT.""" + log.info(f'Overture release: {OVERTURE_RELEASE}') + log.info(f'S3 path: {S3_PATH}') + log.info(f'Bounding box: {BBOX}') + + # Connect to PostgreSQL + conn = psycopg2.connect(**DB_CONFIG) + conn.autocommit = False + create_table(conn) + + # Set up DuckDB with httpfs and spatial for S3 access + duck = duckdb.connect() + duck.execute("INSTALL httpfs; LOAD httpfs;") + duck.execute("INSTALL spatial; LOAD spatial;") + duck.execute("SET s3_region='us-west-2';") + + # Use a materialized approach: DuckDB query → Arrow → iterate in Python + query = f""" + SELECT + id, + ST_X(geometry) AS lon, + ST_Y(geometry) AS lat, + names.primary AS name, + basic_category, + confidence, + phones, + websites, + socials, + brand, + sources + FROM read_parquet('{S3_PATH}', hive_partitioning=true) + WHERE bbox.xmin >= {BBOX['xmin']} + AND bbox.xmax <= {BBOX['xmax']} + AND bbox.ymin >= {BBOX['ymin']} + AND bbox.ymax <= {BBOX['ymax']} + """ + + log.info('Starting DuckDB query against S3 (this will take several minutes)...') + t_start = time.time() + + # Execute and fetch all as Arrow for efficient iteration + result_rel = duck.sql(query) + + upsert_sql = """ + INSERT INTO places (id, geometry, name, basic_category, confidence, + phone, website, socials, brand_name, brand_wikidata, + osm_type, osm_id, source_record_id, raw_sources) + VALUES %s + ON CONFLICT (id) DO UPDATE SET + geometry = EXCLUDED.geometry, + name = EXCLUDED.name, + basic_category = EXCLUDED.basic_category, + confidence = EXCLUDED.confidence, + phone = EXCLUDED.phone, + website = EXCLUDED.website, + socials = EXCLUDED.socials, + brand_name = EXCLUDED.brand_name, + brand_wikidata = EXCLUDED.brand_wikidata, + osm_type = EXCLUDED.osm_type, + osm_id = EXCLUDED.osm_id, + source_record_id = EXCLUDED.source_record_id, + raw_sources = EXCLUDED.raw_sources + """ + + template = """( + %(id)s, + ST_SetSRID(ST_MakePoint(%(lon)s, %(lat)s), 4326), + %(name)s, + %(basic_category)s, + %(confidence)s, + %(phone)s, + %(website)s, + %(socials)s::jsonb, + %(brand_name)s, + %(brand_wikidata)s, + %(osm_type)s, + %(osm_id)s, + %(source_record_id)s, + %(raw_sources)s::jsonb + )""" + + total = 0 + osm_refs = 0 + batch = [] + + log.info('DuckDB query executing, fetching results in chunks...') + + # Fetch in chunks using fetchmany on the relation + chunk_size = BATCH_SIZE + while True: + chunk = result_rel.fetchmany(chunk_size) + if not chunk: + break + + for row in chunk: + row_id = row[0] + lon = row[1] + lat = row[2] + name = row[3] + basic_cat = row[4] + conf = row[5] + phones = row[6] + websites = row[7] + socials_raw = row[8] + brand_raw = row[9] + sources_raw = row[10] + + if lon is None or lat is None: + continue + + # Phone: first element of VARCHAR[] + phone = None + if phones and len(phones) > 0: + phone = str(phones[0]) if phones[0] else None + + # Website: first element of VARCHAR[] + website = None + if websites and len(websites) > 0: + website = str(websites[0]) if websites[0] else None + + # Socials: VARCHAR[] → JSON array of strings + socials_json = None + if socials_raw and len(socials_raw) > 0: + socials_json = json.dumps([str(s) for s in socials_raw if s]) + + # Brand: struct with wikidata and names.primary + brand_name = None + brand_wikidata = None + if brand_raw: + try: + if isinstance(brand_raw, dict): + brand_wikidata = brand_raw.get('wikidata') + names_struct = brand_raw.get('names') + if names_struct and isinstance(names_struct, dict): + brand_name = names_struct.get('primary') + else: + # DuckDB struct — access by key + brand_wikidata = brand_raw['wikidata'] if 'wikidata' in dir(brand_raw) else None + try: + brand_wikidata = brand_raw[0] # wikidata is first field + names_struct = brand_raw[1] # names is second field + if names_struct: + brand_name = names_struct[0] # primary is first field + except (IndexError, TypeError): + pass + except Exception: + pass + + # Sources: parse OSM cross-reference + sources_list = None + if sources_raw: + if isinstance(sources_raw, (list, tuple)): + sources_list = [] + for s in sources_raw: + if isinstance(s, dict): + sources_list.append(s) + else: + # DuckDB struct tuple — convert + try: + sources_list.append({ + 'dataset': s[1] if len(s) > 1 else None, + 'record_id': s[3] if len(s) > 3 else None, + }) + except (TypeError, IndexError): + pass + + osm_type_letter, osm_id_val, source_record_id = parse_osm_ref(sources_list) + if osm_type_letter: + osm_refs += 1 + + raw_sources_json = json.dumps(sources_list) if sources_list else None + + batch.append({ + 'id': row_id, + 'lon': float(lon), + 'lat': float(lat), + 'name': name, + 'basic_category': basic_cat, + 'confidence': float(conf) if conf is not None else None, + 'phone': phone, + 'website': website, + 'socials': socials_json, + 'brand_name': brand_name, + 'brand_wikidata': brand_wikidata, + 'osm_type': osm_type_letter, + 'osm_id': osm_id_val, + 'source_record_id': source_record_id, + 'raw_sources': raw_sources_json, + }) + + if len(batch) >= BATCH_SIZE: + with conn.cursor() as cur: + psycopg2.extras.execute_values( + cur, upsert_sql, batch, + template=template, + page_size=BATCH_SIZE + ) + conn.commit() + total += len(batch) + elapsed = time.time() - t_start + rate = total / elapsed if elapsed > 0 else 0 + log.info(f'Inserted {total:,} rows ({osm_refs:,} OSM xrefs) ' + f'[{rate:.0f} rows/sec, {elapsed:.0f}s elapsed]') + batch = [] + + # Flush remaining + if batch: + with conn.cursor() as cur: + psycopg2.extras.execute_values( + cur, upsert_sql, batch, + template=template, + page_size=BATCH_SIZE + ) + conn.commit() + total += len(batch) + + duck.close() + + # Final stats + elapsed = time.time() - t_start + log.info(f'Import complete: {total:,} rows, {osm_refs:,} OSM cross-refs, ' + f'{elapsed:.0f}s total ({total/elapsed:.0f} rows/sec)') + + # Verify + with conn.cursor() as cur: + cur.execute("SELECT count(*) FROM places") + count = cur.fetchone()[0] + cur.execute("SELECT count(*) FROM places WHERE osm_type IS NOT NULL") + osm_count = cur.fetchone()[0] + log.info(f'Final table: {count:,} total rows, {osm_count:,} with OSM cross-references') + + conn.close() + + +if __name__ == '__main__': + run_import() diff --git a/templates/base.html b/templates/base.html index 49b1a21..4c06892 100644 --- a/templates/base.html +++ b/templates/base.html @@ -21,6 +21,7 @@ PeerTube Kiwix Search + Nav-I Settings {% if subnav %} diff --git a/templates/knowledge/deleted_contacts.html b/templates/knowledge/deleted_contacts.html new file mode 100644 index 0000000..58a9ff5 --- /dev/null +++ b/templates/knowledge/deleted_contacts.html @@ -0,0 +1,56 @@ +{% extends "base.html" %} +{% block content %} +

    Deleted Contacts

    +{% if not contacts %} +

    No deleted contacts.

    +{% else %} +
  • + + {% for c in contacts %} + + + + + + + + + {% endfor %} +
    LabelNameCategoryPhoneDeleted AtActions
    {{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} + + +
    +{% endif %} +{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/templates/navi/api_keys.html b/templates/navi/api_keys.html new file mode 100644 index 0000000..abf2d16 --- /dev/null +++ b/templates/navi/api_keys.html @@ -0,0 +1,269 @@ +{% extends "base.html" %} +{% block content %} +

    API Keys

    + +
    +

    Updating keys does not restart RECON. After updates, click Restart RECON below or restart manually from terminal.

    +
    + +
    Loading keys...
    + + + + + + + + + + + +
    + + +
    + + + +{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/templates/navi/deleted_contacts.html b/templates/navi/deleted_contacts.html new file mode 100644 index 0000000..0847fab --- /dev/null +++ b/templates/navi/deleted_contacts.html @@ -0,0 +1,116 @@ +{% extends "base.html" %} +{% block content %} +

    Deleted Contacts

    +{% if not contacts %} +

    No deleted contacts.

    +{% else %} + + + {% for c in contacts %} + + + + + + + + + {% endfor %} +
    LabelNameCategoryPhoneDeleted AtActions
    {{ c.label }}{{ c.name or '' }}{{ c.category or '' }}{{ c.phone or '' }}{{ c.deleted_at or '' }} + + +
    +{% endif %} + + + +{% endblock %} +{% block scripts %} + +{% endblock %} diff --git a/templates/navi/landing.html b/templates/navi/landing.html new file mode 100644 index 0000000..131f3af --- /dev/null +++ b/templates/navi/landing.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block content %} +

    Nav-I

    +

    Navi frontend management — contacts, API keys, and configuration.

    + + +{% endblock %}