feat(navi): add netsyms tier-2 geocoding + geocode API

Add Netsyms AddressDatabase2025 (159M US+CA addresses) as tier-2
in the geocode chain: address_book → netsyms → photon.

- lib/netsyms.py: SQLite lookup module (lazy, read-only, thread-safe)
- lib/netsyms_api.py: Flask blueprints for /api/netsyms/* and /api/geocode
- lib/netsyms_test.py: 7 test cases (street, free-text, zipcode, health)
- lib/nav_tools.py: new geocode() with consistent {name,lat,lon,source,raw}
- lib/api.py: register netsyms_bp and geocode_bp

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-20 07:24:09 +00:00
commit dfab388769
5 changed files with 475 additions and 0 deletions

View file

@ -48,6 +48,11 @@ app.config['MAX_CONTENT_LENGTH'] = None # ZIM files can be multi-GB
from .address_book_api import address_book_bp from .address_book_api import address_book_bp
app.register_blueprint(address_book_bp) app.register_blueprint(address_book_bp)
# ── Netsyms + Geocode Blueprints ──
from .netsyms_api import netsyms_bp, geocode_bp
app.register_blueprint(netsyms_bp)
app.register_blueprint(geocode_bp)
# ── Navigation Constants ── # ── Navigation Constants ──

View file

@ -70,6 +70,120 @@ def _geocode(query: str):
return coords[1], coords[0], display # lat, lon return coords[1], coords[0], display # lat, lon
def geocode(query: str):
"""
Three-tier geocode chain returning a consistent shape.
Chain: address_book (exact) netsyms photon.
Returns dict with {name, lat, lon, source, raw} or None.
"""
coords = _parse_coords(query)
if coords:
return {
'name': query,
'lat': coords[0],
'lon': coords[1],
'source': 'coordinates',
'raw': None,
}
# ── Tier 1: Address book (exact match only) ──
ab_partial = None
try:
from . import address_book
match = address_book.lookup(query)
if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'):
logger.info("geocode: address_book exact match: %r%s", query, match['name'])
return {
'name': match.get('address') or match['name'],
'lat': match['lat'],
'lon': match['lon'],
'source': 'address_book',
'raw': match,
}
elif match and match['confidence'] == 'partial':
logger.info("geocode: address_book partial match: %r%s (continuing chain)",
query, match['name'])
ab_partial = match
except Exception as e:
logger.debug("geocode: address_book lookup failed: %s", e)
# ── Tier 2: Netsyms (159M US+CA addresses) ──
netsyms_result = None
try:
from . import netsyms
results = netsyms.lookup_free_text(query)
if results:
# Prefer results with plus4 (more precise)
best = results[0]
for r in results:
if r.get('plus4') and not best.get('plus4'):
best = r
break
addr_parts = [best['number'], best['street']]
if best.get('street2'):
addr_parts.append(best['street2'])
addr_parts.extend([best['city'], best['state'], best['zipcode']])
display = ' '.join(p for p in addr_parts if p)
netsyms_result = {
'name': display,
'lat': best['lat'],
'lon': best['lon'],
'source': 'netsyms',
'raw': best,
}
logger.info("geocode: netsyms match: %r%s", query, display)
return netsyms_result
except Exception as e:
logger.debug("geocode: netsyms lookup failed: %s", e)
# ── Tier 3: Photon (global geocoding) ──
try:
resp = requests.get(
f"{PHOTON_URL}/api",
params={"q": query, "limit": 1},
timeout=2,
)
resp.raise_for_status()
data = resp.json()
features = data.get("features", [])
if features:
props = features[0]["properties"]
coords = features[0]["geometry"]["coordinates"] # [lon, lat]
parts = [props.get("name", "")]
for key in ("city", "county", "state", "country"):
v = props.get(key)
if v and v != parts[-1]:
parts.append(v)
display = ", ".join(p for p in parts if p)
logger.info("geocode: photon match: %r%s", query, display)
return {
'name': display,
'lat': coords[1],
'lon': coords[0],
'source': 'photon',
'raw': props,
}
except Exception as e:
logger.debug("geocode: photon lookup failed: %s", e)
# ── Fallback: address book partial match ──
if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'):
logger.info("geocode: falling back to address_book partial: %r%s",
query, ab_partial['name'])
return {
'name': ab_partial.get('address') or ab_partial['name'],
'lat': ab_partial['lat'],
'lon': ab_partial['lon'],
'source': 'address_book',
'raw': ab_partial,
}
logger.info("geocode: no match for %r across all tiers", query)
return None
def reverse_geocode(lat: float, lon: float) -> str: def reverse_geocode(lat: float, lon: float) -> str:
"""Reverse geocode coordinates via Photon. Returns formatted address string.""" """Reverse geocode coordinates via Photon. Returns formatted address string."""
try: try:

228
lib/netsyms.py Normal file
View file

@ -0,0 +1,228 @@
"""
RECON Netsyms AddressDatabase2025 SQLite-backed US+CA address lookup.
Provides 159.78M geocoded addresses as tier-2 between address book
(exact named locations) and Photon (full-text global geocoding).
Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only)
"""
import os
import re
import sqlite3
import threading
from .utils import setup_logging
logger = setup_logging('recon.netsyms')
_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite'
_conn = None
_lock = threading.Lock()
_cached_row_count = None
# US states + DC + territories, CA provinces, for free-text parsing
_STATE_CODES = {
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY',
'DC', 'PR', 'VI', 'GU', 'AS', 'MP',
# Canadian provinces
'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE',
'QC', 'SK', 'YT',
}
_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$')
def _get_conn():
"""Lazy-open a read-only SQLite connection."""
global _conn
if _conn is not None:
return _conn
with _lock:
if _conn is not None:
return _conn
uri = f'file:{_DB_PATH}?mode=ro'
_conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
_conn.row_factory = sqlite3.Row
logger.info("Netsyms DB opened: %s", _DB_PATH)
return _conn
def _row_to_dict(row):
"""Convert a sqlite3.Row to a plain dict with lat/lon keys."""
return {
'zipcode': row['zipcode'],
'number': row['number'],
'street': row['street'],
'street2': row['street2'],
'city': row['city'],
'state': row['state'],
'plus4': row['plus4'],
'country': row['country'],
'lat': float(row['latitude']),
'lon': float(row['longitude']),
'source': row['source'],
}
def lookup_by_street(number, street, city=None, state=None,
zipcode=None, country=None, limit=20):
"""Match on number + street, with optional qualifiers."""
conn = _get_conn()
clauses = ['number = ?', 'street = ?']
params = [str(number).strip().upper(), street.strip().upper()]
if city:
clauses.append('city = ?')
params.append(city.strip().upper())
if state:
clauses.append('state = ?')
params.append(state.strip().upper())
if zipcode:
clauses.append('zipcode = ?')
params.append(zipcode.strip())
if country:
clauses.append('country = ?')
params.append(country.strip().upper())
sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?"
params.append(limit)
with _lock:
try:
rows = conn.execute(sql, params).fetchall()
except sqlite3.Error as e:
logger.warning("Netsyms lookup_by_street error: %s", e)
return []
results = [_row_to_dict(r) for r in rows]
logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results",
number, street, city, state, len(results))
return results
def lookup_free_text(query, country_hint=None):
"""Parse a free-text address and look it up."""
q = query.strip()
if not q:
return []
# Strip trailing zipcode if present
zipcode = None
zip_match = re.search(r'\b(\d{5})\s*$', q)
if zip_match:
zipcode = zip_match.group(1)
q = q[:zip_match.start()].strip().rstrip(',').strip()
# Strip trailing state
tokens = re.split(r'[,\s]+', q)
tokens = [t for t in tokens if t]
if not tokens:
return []
state = None
if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES:
state = tokens[-1].upper()
tokens = tokens[:-1]
# Leading digits → number
number = None
if tokens and re.match(r'^\d', tokens[0]):
number = tokens[0]
tokens = tokens[1:]
if not tokens:
# Only a number, or empty — try zipcode if we have one
if zipcode:
return lookup_by_zipcode(zipcode, limit=20)
return []
# If state was found and we have 2+ tokens remaining, last token is city
city = None
if state and len(tokens) >= 2:
city = tokens[-1]
tokens = tokens[:-1]
street = ' '.join(tokens)
if number:
results = lookup_by_street(number, street, city=city, state=state,
zipcode=zipcode, country=country_hint)
if results:
logger.debug("lookup_free_text(%r) → %d results via street match",
query, len(results))
return results
# Fallback: try zipcode only if available
if zipcode:
return lookup_by_zipcode(zipcode, limit=20)
logger.debug("lookup_free_text(%r) → 0 results", query)
return []
def lookup_by_zipcode(zipcode, limit=100):
"""Direct zipcode lookup."""
conn = _get_conn()
sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?"
params = [zipcode.strip(), limit]
with _lock:
try:
rows = conn.execute(sql, params).fetchall()
except sqlite3.Error as e:
logger.warning("Netsyms lookup_by_zipcode error: %s", e)
return []
results = [_row_to_dict(r) for r in rows]
logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results))
return results
def health():
"""Health check with cached row count."""
global _cached_row_count
try:
file_size = os.path.getsize(_DB_PATH)
except OSError:
return {'ok': False, 'row_count': 0, 'file_size_bytes': 0,
'indexed_countries': []}
try:
conn = _get_conn()
except Exception:
return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size,
'indexed_countries': []}
if _cached_row_count is None:
with _lock:
if _cached_row_count is None:
try:
row = conn.execute(
"SELECT COUNT(*) AS cnt FROM addresses"
).fetchone()
_cached_row_count = row['cnt']
except sqlite3.Error:
_cached_row_count = 0
with _lock:
try:
rows = conn.execute(
"SELECT DISTINCT country FROM addresses"
).fetchall()
countries = sorted(r['country'] for r in rows)
except sqlite3.Error:
countries = []
return {
'ok': True,
'row_count': _cached_row_count,
'file_size_bytes': file_size,
'indexed_countries': countries,
}

48
lib/netsyms_api.py Normal file
View file

@ -0,0 +1,48 @@
"""
RECON Netsyms API + Geocode chain Flask Blueprints.
GET /api/netsyms/lookup?q=<free text>&country=<optional>
GET /api/netsyms/health
GET /api/geocode?q=<query> (full 3-tier chain: address_book netsyms photon)
"""
from flask import Blueprint, request, jsonify
from . import netsyms
from . import address_book
from . import nav_tools
from .utils import setup_logging
logger = setup_logging('recon.netsyms_api')
netsyms_bp = Blueprint('netsyms', __name__)
geocode_bp = Blueprint('geocode', __name__)
@netsyms_bp.route('/api/netsyms/lookup')
def api_netsyms_lookup():
q = request.args.get('q', '').strip()
if not q:
return jsonify({'error': 'Missing q parameter'}), 400
country = request.args.get('country', '').strip() or None
results = netsyms.lookup_free_text(q, country_hint=country)
return jsonify({'results': results, 'count': len(results), 'query': q})
@netsyms_bp.route('/api/netsyms/health')
def api_netsyms_health():
return jsonify(netsyms.health())
@geocode_bp.route('/api/geocode')
def api_geocode():
q = request.args.get('q', '').strip()
if not q:
return jsonify({'error': 'Missing q parameter'}), 400
result = nav_tools.geocode(q)
if result is None:
return jsonify({'error': 'No results', 'query': q}), 404
return jsonify(result)

80
lib/netsyms_test.py Normal file
View file

@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""Tests for Netsyms address database module."""
import sys
import os
# Ensure the lib directory is importable
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from lib import netsyms
def test_lookup_by_street_lowercase():
results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID")
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
r = results[0]
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}"
print(" PASS: lookup_by_street (lowercase)")
def test_lookup_by_street_uppercase():
results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID")
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
r = results[0]
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
print(" PASS: lookup_by_street (uppercase)")
def test_lookup_nonexistent():
results = netsyms.lookup_by_street("999999", "Nonexistent Rd",
city="Filer", state="ID")
assert results == [], f"Expected empty list, got {len(results)} results"
print(" PASS: lookup_by_street (nonexistent)")
def test_free_text_with_commas():
results = netsyms.lookup_free_text("214 North St, Filer, ID")
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
r = results[0]
assert r['city'] == 'FILER', f"City mismatch: {r['city']}"
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
print(" PASS: lookup_free_text (commas)")
def test_free_text_no_commas():
results = netsyms.lookup_free_text("214 North St Filer ID")
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
r = results[0]
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
print(" PASS: lookup_free_text (no commas)")
def test_lookup_by_zipcode():
results = netsyms.lookup_by_zipcode("83328", limit=5)
assert len(results) == 5, f"Expected 5 results, got {len(results)}"
for r in results:
assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}"
print(" PASS: lookup_by_zipcode")
def test_health():
h = netsyms.health()
assert h['ok'] is True, f"Health not OK: {h}"
assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}"
assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}"
assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}"
print(" PASS: health")
if __name__ == '__main__':
print("Running Netsyms tests...")
test_lookup_by_street_lowercase()
test_lookup_by_street_uppercase()
test_lookup_nonexistent()
test_free_text_with_commas()
test_free_text_no_commas()
test_lookup_by_zipcode()
test_health()
print("All tests passed.")