diff --git a/lib/api.py b/lib/api.py index 49e7005..9dd8587 100644 --- a/lib/api.py +++ b/lib/api.py @@ -48,6 +48,11 @@ app.config['MAX_CONTENT_LENGTH'] = None # ZIM files can be multi-GB from .address_book_api import address_book_bp app.register_blueprint(address_book_bp) +# ── Netsyms + Geocode Blueprints ── +from .netsyms_api import netsyms_bp, geocode_bp +app.register_blueprint(netsyms_bp) +app.register_blueprint(geocode_bp) + # ── Navigation Constants ── diff --git a/lib/nav_tools.py b/lib/nav_tools.py index 832ca2d..6d7d343 100644 --- a/lib/nav_tools.py +++ b/lib/nav_tools.py @@ -70,6 +70,120 @@ def _geocode(query: str): return coords[1], coords[0], display # lat, lon + +def geocode(query: str): + """ + Three-tier geocode chain returning a consistent shape. + + Chain: address_book (exact) → netsyms → photon. + Returns dict with {name, lat, lon, source, raw} or None. + """ + coords = _parse_coords(query) + if coords: + return { + 'name': query, + 'lat': coords[0], + 'lon': coords[1], + 'source': 'coordinates', + 'raw': None, + } + + # ── Tier 1: Address book (exact match only) ── + ab_partial = None + try: + from . import address_book + match = address_book.lookup(query) + if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'): + logger.info("geocode: address_book exact match: %r → %s", query, match['name']) + return { + 'name': match.get('address') or match['name'], + 'lat': match['lat'], + 'lon': match['lon'], + 'source': 'address_book', + 'raw': match, + } + elif match and match['confidence'] == 'partial': + logger.info("geocode: address_book partial match: %r → %s (continuing chain)", + query, match['name']) + ab_partial = match + except Exception as e: + logger.debug("geocode: address_book lookup failed: %s", e) + + # ── Tier 2: Netsyms (159M US+CA addresses) ── + netsyms_result = None + try: + from . import netsyms + results = netsyms.lookup_free_text(query) + if results: + # Prefer results with plus4 (more precise) + best = results[0] + for r in results: + if r.get('plus4') and not best.get('plus4'): + best = r + break + addr_parts = [best['number'], best['street']] + if best.get('street2'): + addr_parts.append(best['street2']) + addr_parts.extend([best['city'], best['state'], best['zipcode']]) + display = ' '.join(p for p in addr_parts if p) + netsyms_result = { + 'name': display, + 'lat': best['lat'], + 'lon': best['lon'], + 'source': 'netsyms', + 'raw': best, + } + logger.info("geocode: netsyms match: %r → %s", query, display) + return netsyms_result + except Exception as e: + logger.debug("geocode: netsyms lookup failed: %s", e) + + # ── Tier 3: Photon (global geocoding) ── + try: + resp = requests.get( + f"{PHOTON_URL}/api", + params={"q": query, "limit": 1}, + timeout=2, + ) + resp.raise_for_status() + data = resp.json() + features = data.get("features", []) + if features: + props = features[0]["properties"] + coords = features[0]["geometry"]["coordinates"] # [lon, lat] + parts = [props.get("name", "")] + for key in ("city", "county", "state", "country"): + v = props.get(key) + if v and v != parts[-1]: + parts.append(v) + display = ", ".join(p for p in parts if p) + logger.info("geocode: photon match: %r → %s", query, display) + return { + 'name': display, + 'lat': coords[1], + 'lon': coords[0], + 'source': 'photon', + 'raw': props, + } + except Exception as e: + logger.debug("geocode: photon lookup failed: %s", e) + + # ── Fallback: address book partial match ── + if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'): + logger.info("geocode: falling back to address_book partial: %r → %s", + query, ab_partial['name']) + return { + 'name': ab_partial.get('address') or ab_partial['name'], + 'lat': ab_partial['lat'], + 'lon': ab_partial['lon'], + 'source': 'address_book', + 'raw': ab_partial, + } + + logger.info("geocode: no match for %r across all tiers", query) + return None + + def reverse_geocode(lat: float, lon: float) -> str: """Reverse geocode coordinates via Photon. Returns formatted address string.""" try: diff --git a/lib/netsyms.py b/lib/netsyms.py new file mode 100644 index 0000000..d51162e --- /dev/null +++ b/lib/netsyms.py @@ -0,0 +1,228 @@ +""" +RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup. + +Provides 159.78M geocoded addresses as tier-2 between address book +(exact named locations) and Photon (full-text global geocoding). + +Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only) +""" + +import os +import re +import sqlite3 +import threading + +from .utils import setup_logging + +logger = setup_logging('recon.netsyms') + +_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite' + +_conn = None +_lock = threading.Lock() +_cached_row_count = None + +# US states + DC + territories, CA provinces, for free-text parsing +_STATE_CODES = { + 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', + 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', + 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', + 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', + 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', + 'DC', 'PR', 'VI', 'GU', 'AS', 'MP', + # Canadian provinces + 'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE', + 'QC', 'SK', 'YT', +} + +_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$') + + +def _get_conn(): + """Lazy-open a read-only SQLite connection.""" + global _conn + if _conn is not None: + return _conn + with _lock: + if _conn is not None: + return _conn + uri = f'file:{_DB_PATH}?mode=ro' + _conn = sqlite3.connect(uri, uri=True, check_same_thread=False) + _conn.row_factory = sqlite3.Row + logger.info("Netsyms DB opened: %s", _DB_PATH) + return _conn + + +def _row_to_dict(row): + """Convert a sqlite3.Row to a plain dict with lat/lon keys.""" + return { + 'zipcode': row['zipcode'], + 'number': row['number'], + 'street': row['street'], + 'street2': row['street2'], + 'city': row['city'], + 'state': row['state'], + 'plus4': row['plus4'], + 'country': row['country'], + 'lat': float(row['latitude']), + 'lon': float(row['longitude']), + 'source': row['source'], + } + + +def lookup_by_street(number, street, city=None, state=None, + zipcode=None, country=None, limit=20): + """Match on number + street, with optional qualifiers.""" + conn = _get_conn() + clauses = ['number = ?', 'street = ?'] + params = [str(number).strip().upper(), street.strip().upper()] + + if city: + clauses.append('city = ?') + params.append(city.strip().upper()) + if state: + clauses.append('state = ?') + params.append(state.strip().upper()) + if zipcode: + clauses.append('zipcode = ?') + params.append(zipcode.strip()) + if country: + clauses.append('country = ?') + params.append(country.strip().upper()) + + sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?" + params.append(limit) + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_street error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results", + number, street, city, state, len(results)) + return results + + +def lookup_free_text(query, country_hint=None): + """Parse a free-text address and look it up.""" + q = query.strip() + if not q: + return [] + + # Strip trailing zipcode if present + zipcode = None + zip_match = re.search(r'\b(\d{5})\s*$', q) + if zip_match: + zipcode = zip_match.group(1) + q = q[:zip_match.start()].strip().rstrip(',').strip() + + # Strip trailing state + tokens = re.split(r'[,\s]+', q) + tokens = [t for t in tokens if t] + if not tokens: + return [] + + state = None + if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES: + state = tokens[-1].upper() + tokens = tokens[:-1] + + # Leading digits → number + number = None + if tokens and re.match(r'^\d', tokens[0]): + number = tokens[0] + tokens = tokens[1:] + + if not tokens: + # Only a number, or empty — try zipcode if we have one + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + return [] + + # If state was found and we have 2+ tokens remaining, last token is city + city = None + if state and len(tokens) >= 2: + city = tokens[-1] + tokens = tokens[:-1] + + street = ' '.join(tokens) + + if number: + results = lookup_by_street(number, street, city=city, state=state, + zipcode=zipcode, country=country_hint) + if results: + logger.debug("lookup_free_text(%r) → %d results via street match", + query, len(results)) + return results + + # Fallback: try zipcode only if available + if zipcode: + return lookup_by_zipcode(zipcode, limit=20) + + logger.debug("lookup_free_text(%r) → 0 results", query) + return [] + + +def lookup_by_zipcode(zipcode, limit=100): + """Direct zipcode lookup.""" + conn = _get_conn() + sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?" + params = [zipcode.strip(), limit] + + with _lock: + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.Error as e: + logger.warning("Netsyms lookup_by_zipcode error: %s", e) + return [] + + results = [_row_to_dict(r) for r in rows] + logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results)) + return results + + +def health(): + """Health check with cached row count.""" + global _cached_row_count + + try: + file_size = os.path.getsize(_DB_PATH) + except OSError: + return {'ok': False, 'row_count': 0, 'file_size_bytes': 0, + 'indexed_countries': []} + + try: + conn = _get_conn() + except Exception: + return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size, + 'indexed_countries': []} + + if _cached_row_count is None: + with _lock: + if _cached_row_count is None: + try: + row = conn.execute( + "SELECT COUNT(*) AS cnt FROM addresses" + ).fetchone() + _cached_row_count = row['cnt'] + except sqlite3.Error: + _cached_row_count = 0 + + with _lock: + try: + rows = conn.execute( + "SELECT DISTINCT country FROM addresses" + ).fetchall() + countries = sorted(r['country'] for r in rows) + except sqlite3.Error: + countries = [] + + return { + 'ok': True, + 'row_count': _cached_row_count, + 'file_size_bytes': file_size, + 'indexed_countries': countries, + } diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py new file mode 100644 index 0000000..2d6e43d --- /dev/null +++ b/lib/netsyms_api.py @@ -0,0 +1,48 @@ +""" +RECON Netsyms API + Geocode chain — Flask Blueprints. + +GET /api/netsyms/lookup?q=&country= +GET /api/netsyms/health +GET /api/geocode?q= (full 3-tier chain: address_book → netsyms → photon) +""" + +from flask import Blueprint, request, jsonify + +from . import netsyms +from . import address_book +from . import nav_tools +from .utils import setup_logging + +logger = setup_logging('recon.netsyms_api') + +netsyms_bp = Blueprint('netsyms', __name__) +geocode_bp = Blueprint('geocode', __name__) + + +@netsyms_bp.route('/api/netsyms/lookup') +def api_netsyms_lookup(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + country = request.args.get('country', '').strip() or None + results = netsyms.lookup_free_text(q, country_hint=country) + return jsonify({'results': results, 'count': len(results), 'query': q}) + + +@netsyms_bp.route('/api/netsyms/health') +def api_netsyms_health(): + return jsonify(netsyms.health()) + + +@geocode_bp.route('/api/geocode') +def api_geocode(): + q = request.args.get('q', '').strip() + if not q: + return jsonify({'error': 'Missing q parameter'}), 400 + + result = nav_tools.geocode(q) + if result is None: + return jsonify({'error': 'No results', 'query': q}), 404 + + return jsonify(result) diff --git a/lib/netsyms_test.py b/lib/netsyms_test.py new file mode 100644 index 0000000..ed70472 --- /dev/null +++ b/lib/netsyms_test.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Tests for Netsyms address database module.""" + +import sys +import os + +# Ensure the lib directory is importable +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from lib import netsyms + + +def test_lookup_by_street_lowercase(): + results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}" + print(" PASS: lookup_by_street (lowercase)") + + +def test_lookup_by_street_uppercase(): + results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}" + print(" PASS: lookup_by_street (uppercase)") + + +def test_lookup_nonexistent(): + results = netsyms.lookup_by_street("999999", "Nonexistent Rd", + city="Filer", state="ID") + assert results == [], f"Expected empty list, got {len(results)} results" + print(" PASS: lookup_by_street (nonexistent)") + + +def test_free_text_with_commas(): + results = netsyms.lookup_free_text("214 North St, Filer, ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['city'] == 'FILER', f"City mismatch: {r['city']}" + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (commas)") + + +def test_free_text_no_commas(): + results = netsyms.lookup_free_text("214 North St Filer ID") + assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}" + r = results[0] + assert r['state'] == 'ID', f"State mismatch: {r['state']}" + print(" PASS: lookup_free_text (no commas)") + + +def test_lookup_by_zipcode(): + results = netsyms.lookup_by_zipcode("83328", limit=5) + assert len(results) == 5, f"Expected 5 results, got {len(results)}" + for r in results: + assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}" + print(" PASS: lookup_by_zipcode") + + +def test_health(): + h = netsyms.health() + assert h['ok'] is True, f"Health not OK: {h}" + assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}" + assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}" + assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}" + print(" PASS: health") + + +if __name__ == '__main__': + print("Running Netsyms tests...") + test_lookup_by_street_lowercase() + test_lookup_by_street_uppercase() + test_lookup_nonexistent() + test_free_text_with_commas() + test_free_text_no_commas() + test_lookup_by_zipcode() + test_health() + print("All tests passed.")