mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
feat(navi): add netsyms tier-2 geocoding + geocode API
Add Netsyms AddressDatabase2025 (159M US+CA addresses) as tier-2
in the geocode chain: address_book → netsyms → photon.
- lib/netsyms.py: SQLite lookup module (lazy, read-only, thread-safe)
- lib/netsyms_api.py: Flask blueprints for /api/netsyms/* and /api/geocode
- lib/netsyms_test.py: 7 test cases (street, free-text, zipcode, health)
- lib/nav_tools.py: new geocode() with consistent {name,lat,lon,source,raw}
- lib/api.py: register netsyms_bp and geocode_bp
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
23483e8198
commit
dfab388769
5 changed files with 475 additions and 0 deletions
|
|
@ -48,6 +48,11 @@ app.config['MAX_CONTENT_LENGTH'] = None # ZIM files can be multi-GB
|
|||
from .address_book_api import address_book_bp
|
||||
app.register_blueprint(address_book_bp)
|
||||
|
||||
# ── Netsyms + Geocode Blueprints ──
|
||||
from .netsyms_api import netsyms_bp, geocode_bp
|
||||
app.register_blueprint(netsyms_bp)
|
||||
app.register_blueprint(geocode_bp)
|
||||
|
||||
|
||||
# ── Navigation Constants ──
|
||||
|
||||
|
|
|
|||
114
lib/nav_tools.py
114
lib/nav_tools.py
|
|
@ -70,6 +70,120 @@ def _geocode(query: str):
|
|||
return coords[1], coords[0], display # lat, lon
|
||||
|
||||
|
||||
|
||||
def geocode(query: str):
|
||||
"""
|
||||
Three-tier geocode chain returning a consistent shape.
|
||||
|
||||
Chain: address_book (exact) → netsyms → photon.
|
||||
Returns dict with {name, lat, lon, source, raw} or None.
|
||||
"""
|
||||
coords = _parse_coords(query)
|
||||
if coords:
|
||||
return {
|
||||
'name': query,
|
||||
'lat': coords[0],
|
||||
'lon': coords[1],
|
||||
'source': 'coordinates',
|
||||
'raw': None,
|
||||
}
|
||||
|
||||
# ── Tier 1: Address book (exact match only) ──
|
||||
ab_partial = None
|
||||
try:
|
||||
from . import address_book
|
||||
match = address_book.lookup(query)
|
||||
if match and match['confidence'] == 'exact' and match.get('lat') and match.get('lon'):
|
||||
logger.info("geocode: address_book exact match: %r → %s", query, match['name'])
|
||||
return {
|
||||
'name': match.get('address') or match['name'],
|
||||
'lat': match['lat'],
|
||||
'lon': match['lon'],
|
||||
'source': 'address_book',
|
||||
'raw': match,
|
||||
}
|
||||
elif match and match['confidence'] == 'partial':
|
||||
logger.info("geocode: address_book partial match: %r → %s (continuing chain)",
|
||||
query, match['name'])
|
||||
ab_partial = match
|
||||
except Exception as e:
|
||||
logger.debug("geocode: address_book lookup failed: %s", e)
|
||||
|
||||
# ── Tier 2: Netsyms (159M US+CA addresses) ──
|
||||
netsyms_result = None
|
||||
try:
|
||||
from . import netsyms
|
||||
results = netsyms.lookup_free_text(query)
|
||||
if results:
|
||||
# Prefer results with plus4 (more precise)
|
||||
best = results[0]
|
||||
for r in results:
|
||||
if r.get('plus4') and not best.get('plus4'):
|
||||
best = r
|
||||
break
|
||||
addr_parts = [best['number'], best['street']]
|
||||
if best.get('street2'):
|
||||
addr_parts.append(best['street2'])
|
||||
addr_parts.extend([best['city'], best['state'], best['zipcode']])
|
||||
display = ' '.join(p for p in addr_parts if p)
|
||||
netsyms_result = {
|
||||
'name': display,
|
||||
'lat': best['lat'],
|
||||
'lon': best['lon'],
|
||||
'source': 'netsyms',
|
||||
'raw': best,
|
||||
}
|
||||
logger.info("geocode: netsyms match: %r → %s", query, display)
|
||||
return netsyms_result
|
||||
except Exception as e:
|
||||
logger.debug("geocode: netsyms lookup failed: %s", e)
|
||||
|
||||
# ── Tier 3: Photon (global geocoding) ──
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"{PHOTON_URL}/api",
|
||||
params={"q": query, "limit": 1},
|
||||
timeout=2,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
features = data.get("features", [])
|
||||
if features:
|
||||
props = features[0]["properties"]
|
||||
coords = features[0]["geometry"]["coordinates"] # [lon, lat]
|
||||
parts = [props.get("name", "")]
|
||||
for key in ("city", "county", "state", "country"):
|
||||
v = props.get(key)
|
||||
if v and v != parts[-1]:
|
||||
parts.append(v)
|
||||
display = ", ".join(p for p in parts if p)
|
||||
logger.info("geocode: photon match: %r → %s", query, display)
|
||||
return {
|
||||
'name': display,
|
||||
'lat': coords[1],
|
||||
'lon': coords[0],
|
||||
'source': 'photon',
|
||||
'raw': props,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("geocode: photon lookup failed: %s", e)
|
||||
|
||||
# ── Fallback: address book partial match ──
|
||||
if ab_partial and ab_partial.get('lat') and ab_partial.get('lon'):
|
||||
logger.info("geocode: falling back to address_book partial: %r → %s",
|
||||
query, ab_partial['name'])
|
||||
return {
|
||||
'name': ab_partial.get('address') or ab_partial['name'],
|
||||
'lat': ab_partial['lat'],
|
||||
'lon': ab_partial['lon'],
|
||||
'source': 'address_book',
|
||||
'raw': ab_partial,
|
||||
}
|
||||
|
||||
logger.info("geocode: no match for %r across all tiers", query)
|
||||
return None
|
||||
|
||||
|
||||
def reverse_geocode(lat: float, lon: float) -> str:
|
||||
"""Reverse geocode coordinates via Photon. Returns formatted address string."""
|
||||
try:
|
||||
|
|
|
|||
228
lib/netsyms.py
Normal file
228
lib/netsyms.py
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
"""
|
||||
RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup.
|
||||
|
||||
Provides 159.78M geocoded addresses as tier-2 between address book
|
||||
(exact named locations) and Photon (full-text global geocoding).
|
||||
|
||||
Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only)
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import threading
|
||||
|
||||
from .utils import setup_logging
|
||||
|
||||
logger = setup_logging('recon.netsyms')
|
||||
|
||||
_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite'
|
||||
|
||||
_conn = None
|
||||
_lock = threading.Lock()
|
||||
_cached_row_count = None
|
||||
|
||||
# US states + DC + territories, CA provinces, for free-text parsing
|
||||
_STATE_CODES = {
|
||||
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
|
||||
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
|
||||
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
|
||||
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
|
||||
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY',
|
||||
'DC', 'PR', 'VI', 'GU', 'AS', 'MP',
|
||||
# Canadian provinces
|
||||
'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE',
|
||||
'QC', 'SK', 'YT',
|
||||
}
|
||||
|
||||
_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$')
|
||||
|
||||
|
||||
def _get_conn():
|
||||
"""Lazy-open a read-only SQLite connection."""
|
||||
global _conn
|
||||
if _conn is not None:
|
||||
return _conn
|
||||
with _lock:
|
||||
if _conn is not None:
|
||||
return _conn
|
||||
uri = f'file:{_DB_PATH}?mode=ro'
|
||||
_conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
|
||||
_conn.row_factory = sqlite3.Row
|
||||
logger.info("Netsyms DB opened: %s", _DB_PATH)
|
||||
return _conn
|
||||
|
||||
|
||||
def _row_to_dict(row):
|
||||
"""Convert a sqlite3.Row to a plain dict with lat/lon keys."""
|
||||
return {
|
||||
'zipcode': row['zipcode'],
|
||||
'number': row['number'],
|
||||
'street': row['street'],
|
||||
'street2': row['street2'],
|
||||
'city': row['city'],
|
||||
'state': row['state'],
|
||||
'plus4': row['plus4'],
|
||||
'country': row['country'],
|
||||
'lat': float(row['latitude']),
|
||||
'lon': float(row['longitude']),
|
||||
'source': row['source'],
|
||||
}
|
||||
|
||||
|
||||
def lookup_by_street(number, street, city=None, state=None,
|
||||
zipcode=None, country=None, limit=20):
|
||||
"""Match on number + street, with optional qualifiers."""
|
||||
conn = _get_conn()
|
||||
clauses = ['number = ?', 'street = ?']
|
||||
params = [str(number).strip().upper(), street.strip().upper()]
|
||||
|
||||
if city:
|
||||
clauses.append('city = ?')
|
||||
params.append(city.strip().upper())
|
||||
if state:
|
||||
clauses.append('state = ?')
|
||||
params.append(state.strip().upper())
|
||||
if zipcode:
|
||||
clauses.append('zipcode = ?')
|
||||
params.append(zipcode.strip())
|
||||
if country:
|
||||
clauses.append('country = ?')
|
||||
params.append(country.strip().upper())
|
||||
|
||||
sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
with _lock:
|
||||
try:
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
except sqlite3.Error as e:
|
||||
logger.warning("Netsyms lookup_by_street error: %s", e)
|
||||
return []
|
||||
|
||||
results = [_row_to_dict(r) for r in rows]
|
||||
logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results",
|
||||
number, street, city, state, len(results))
|
||||
return results
|
||||
|
||||
|
||||
def lookup_free_text(query, country_hint=None):
|
||||
"""Parse a free-text address and look it up."""
|
||||
q = query.strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Strip trailing zipcode if present
|
||||
zipcode = None
|
||||
zip_match = re.search(r'\b(\d{5})\s*$', q)
|
||||
if zip_match:
|
||||
zipcode = zip_match.group(1)
|
||||
q = q[:zip_match.start()].strip().rstrip(',').strip()
|
||||
|
||||
# Strip trailing state
|
||||
tokens = re.split(r'[,\s]+', q)
|
||||
tokens = [t for t in tokens if t]
|
||||
if not tokens:
|
||||
return []
|
||||
|
||||
state = None
|
||||
if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES:
|
||||
state = tokens[-1].upper()
|
||||
tokens = tokens[:-1]
|
||||
|
||||
# Leading digits → number
|
||||
number = None
|
||||
if tokens and re.match(r'^\d', tokens[0]):
|
||||
number = tokens[0]
|
||||
tokens = tokens[1:]
|
||||
|
||||
if not tokens:
|
||||
# Only a number, or empty — try zipcode if we have one
|
||||
if zipcode:
|
||||
return lookup_by_zipcode(zipcode, limit=20)
|
||||
return []
|
||||
|
||||
# If state was found and we have 2+ tokens remaining, last token is city
|
||||
city = None
|
||||
if state and len(tokens) >= 2:
|
||||
city = tokens[-1]
|
||||
tokens = tokens[:-1]
|
||||
|
||||
street = ' '.join(tokens)
|
||||
|
||||
if number:
|
||||
results = lookup_by_street(number, street, city=city, state=state,
|
||||
zipcode=zipcode, country=country_hint)
|
||||
if results:
|
||||
logger.debug("lookup_free_text(%r) → %d results via street match",
|
||||
query, len(results))
|
||||
return results
|
||||
|
||||
# Fallback: try zipcode only if available
|
||||
if zipcode:
|
||||
return lookup_by_zipcode(zipcode, limit=20)
|
||||
|
||||
logger.debug("lookup_free_text(%r) → 0 results", query)
|
||||
return []
|
||||
|
||||
|
||||
def lookup_by_zipcode(zipcode, limit=100):
|
||||
"""Direct zipcode lookup."""
|
||||
conn = _get_conn()
|
||||
sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?"
|
||||
params = [zipcode.strip(), limit]
|
||||
|
||||
with _lock:
|
||||
try:
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
except sqlite3.Error as e:
|
||||
logger.warning("Netsyms lookup_by_zipcode error: %s", e)
|
||||
return []
|
||||
|
||||
results = [_row_to_dict(r) for r in rows]
|
||||
logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results))
|
||||
return results
|
||||
|
||||
|
||||
def health():
|
||||
"""Health check with cached row count."""
|
||||
global _cached_row_count
|
||||
|
||||
try:
|
||||
file_size = os.path.getsize(_DB_PATH)
|
||||
except OSError:
|
||||
return {'ok': False, 'row_count': 0, 'file_size_bytes': 0,
|
||||
'indexed_countries': []}
|
||||
|
||||
try:
|
||||
conn = _get_conn()
|
||||
except Exception:
|
||||
return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size,
|
||||
'indexed_countries': []}
|
||||
|
||||
if _cached_row_count is None:
|
||||
with _lock:
|
||||
if _cached_row_count is None:
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) AS cnt FROM addresses"
|
||||
).fetchone()
|
||||
_cached_row_count = row['cnt']
|
||||
except sqlite3.Error:
|
||||
_cached_row_count = 0
|
||||
|
||||
with _lock:
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT country FROM addresses"
|
||||
).fetchall()
|
||||
countries = sorted(r['country'] for r in rows)
|
||||
except sqlite3.Error:
|
||||
countries = []
|
||||
|
||||
return {
|
||||
'ok': True,
|
||||
'row_count': _cached_row_count,
|
||||
'file_size_bytes': file_size,
|
||||
'indexed_countries': countries,
|
||||
}
|
||||
48
lib/netsyms_api.py
Normal file
48
lib/netsyms_api.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
"""
|
||||
RECON Netsyms API + Geocode chain — Flask Blueprints.
|
||||
|
||||
GET /api/netsyms/lookup?q=<free text>&country=<optional>
|
||||
GET /api/netsyms/health
|
||||
GET /api/geocode?q=<query> (full 3-tier chain: address_book → netsyms → photon)
|
||||
"""
|
||||
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from . import netsyms
|
||||
from . import address_book
|
||||
from . import nav_tools
|
||||
from .utils import setup_logging
|
||||
|
||||
logger = setup_logging('recon.netsyms_api')
|
||||
|
||||
netsyms_bp = Blueprint('netsyms', __name__)
|
||||
geocode_bp = Blueprint('geocode', __name__)
|
||||
|
||||
|
||||
@netsyms_bp.route('/api/netsyms/lookup')
|
||||
def api_netsyms_lookup():
|
||||
q = request.args.get('q', '').strip()
|
||||
if not q:
|
||||
return jsonify({'error': 'Missing q parameter'}), 400
|
||||
|
||||
country = request.args.get('country', '').strip() or None
|
||||
results = netsyms.lookup_free_text(q, country_hint=country)
|
||||
return jsonify({'results': results, 'count': len(results), 'query': q})
|
||||
|
||||
|
||||
@netsyms_bp.route('/api/netsyms/health')
|
||||
def api_netsyms_health():
|
||||
return jsonify(netsyms.health())
|
||||
|
||||
|
||||
@geocode_bp.route('/api/geocode')
|
||||
def api_geocode():
|
||||
q = request.args.get('q', '').strip()
|
||||
if not q:
|
||||
return jsonify({'error': 'Missing q parameter'}), 400
|
||||
|
||||
result = nav_tools.geocode(q)
|
||||
if result is None:
|
||||
return jsonify({'error': 'No results', 'query': q}), 404
|
||||
|
||||
return jsonify(result)
|
||||
80
lib/netsyms_test.py
Normal file
80
lib/netsyms_test.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Tests for Netsyms address database module."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Ensure the lib directory is importable
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from lib import netsyms
|
||||
|
||||
|
||||
def test_lookup_by_street_lowercase():
|
||||
results = netsyms.lookup_by_street("214", "North St", city="Filer", state="ID")
|
||||
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||
r = results[0]
|
||||
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
|
||||
assert abs(r['lon'] - (-114.6066)) < 0.01, f"Lon mismatch: {r['lon']}"
|
||||
print(" PASS: lookup_by_street (lowercase)")
|
||||
|
||||
|
||||
def test_lookup_by_street_uppercase():
|
||||
results = netsyms.lookup_by_street("214", "NORTH ST", city="FILER", state="ID")
|
||||
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||
r = results[0]
|
||||
assert abs(r['lat'] - 42.5736) < 0.01, f"Lat mismatch: {r['lat']}"
|
||||
print(" PASS: lookup_by_street (uppercase)")
|
||||
|
||||
|
||||
def test_lookup_nonexistent():
|
||||
results = netsyms.lookup_by_street("999999", "Nonexistent Rd",
|
||||
city="Filer", state="ID")
|
||||
assert results == [], f"Expected empty list, got {len(results)} results"
|
||||
print(" PASS: lookup_by_street (nonexistent)")
|
||||
|
||||
|
||||
def test_free_text_with_commas():
|
||||
results = netsyms.lookup_free_text("214 North St, Filer, ID")
|
||||
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||
r = results[0]
|
||||
assert r['city'] == 'FILER', f"City mismatch: {r['city']}"
|
||||
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
|
||||
print(" PASS: lookup_free_text (commas)")
|
||||
|
||||
|
||||
def test_free_text_no_commas():
|
||||
results = netsyms.lookup_free_text("214 North St Filer ID")
|
||||
assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"
|
||||
r = results[0]
|
||||
assert r['state'] == 'ID', f"State mismatch: {r['state']}"
|
||||
print(" PASS: lookup_free_text (no commas)")
|
||||
|
||||
|
||||
def test_lookup_by_zipcode():
|
||||
results = netsyms.lookup_by_zipcode("83328", limit=5)
|
||||
assert len(results) == 5, f"Expected 5 results, got {len(results)}"
|
||||
for r in results:
|
||||
assert r['zipcode'] == '83328', f"Zipcode mismatch: {r['zipcode']}"
|
||||
print(" PASS: lookup_by_zipcode")
|
||||
|
||||
|
||||
def test_health():
|
||||
h = netsyms.health()
|
||||
assert h['ok'] is True, f"Health not OK: {h}"
|
||||
assert h['row_count'] >= 159_000_000, f"Row count too low: {h['row_count']}"
|
||||
assert 'US' in h['indexed_countries'], f"US not in countries: {h['indexed_countries']}"
|
||||
assert 'CA' in h['indexed_countries'], f"CA not in countries: {h['indexed_countries']}"
|
||||
print(" PASS: health")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Running Netsyms tests...")
|
||||
test_lookup_by_street_lowercase()
|
||||
test_lookup_by_street_uppercase()
|
||||
test_lookup_nonexistent()
|
||||
test_free_text_with_commas()
|
||||
test_free_text_no_commas()
|
||||
test_lookup_by_zipcode()
|
||||
test_health()
|
||||
print("All tests passed.")
|
||||
Loading…
Add table
Add a link
Reference in a new issue