mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 14:44:54 +02:00
228 lines
6.7 KiB
Python
228 lines
6.7 KiB
Python
|
|
"""
|
||
|
|
RECON Netsyms AddressDatabase2025 — SQLite-backed US+CA address lookup.
|
||
|
|
|
||
|
|
Provides 159.78M geocoded addresses as tier-2 between address book
|
||
|
|
(exact named locations) and Photon (full-text global geocoding).
|
||
|
|
|
||
|
|
Database: /mnt/nav/addresses/AddressDatabase2025.sqlite (read-only)
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import sqlite3
|
||
|
|
import threading
|
||
|
|
|
||
|
|
from .utils import setup_logging
|
||
|
|
|
||
|
|
logger = setup_logging('recon.netsyms')
|
||
|
|
|
||
|
|
_DB_PATH = '/mnt/nav/addresses/AddressDatabase2025.sqlite'
|
||
|
|
|
||
|
|
_conn = None
|
||
|
|
_lock = threading.Lock()
|
||
|
|
_cached_row_count = None
|
||
|
|
|
||
|
|
# US states + DC + territories, CA provinces, for free-text parsing
|
||
|
|
_STATE_CODES = {
|
||
|
|
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
|
||
|
|
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
|
||
|
|
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
|
||
|
|
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
|
||
|
|
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY',
|
||
|
|
'DC', 'PR', 'VI', 'GU', 'AS', 'MP',
|
||
|
|
# Canadian provinces
|
||
|
|
'AB', 'BC', 'MB', 'NB', 'NL', 'NS', 'NT', 'NU', 'ON', 'PE',
|
||
|
|
'QC', 'SK', 'YT',
|
||
|
|
}
|
||
|
|
|
||
|
|
_NUMBER_RE = re.compile(r'^(\d+[\w-]*)(.*)$')
|
||
|
|
|
||
|
|
|
||
|
|
def _get_conn():
|
||
|
|
"""Lazy-open a read-only SQLite connection."""
|
||
|
|
global _conn
|
||
|
|
if _conn is not None:
|
||
|
|
return _conn
|
||
|
|
with _lock:
|
||
|
|
if _conn is not None:
|
||
|
|
return _conn
|
||
|
|
uri = f'file:{_DB_PATH}?mode=ro'
|
||
|
|
_conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
|
||
|
|
_conn.row_factory = sqlite3.Row
|
||
|
|
logger.info("Netsyms DB opened: %s", _DB_PATH)
|
||
|
|
return _conn
|
||
|
|
|
||
|
|
|
||
|
|
def _row_to_dict(row):
|
||
|
|
"""Convert a sqlite3.Row to a plain dict with lat/lon keys."""
|
||
|
|
return {
|
||
|
|
'zipcode': row['zipcode'],
|
||
|
|
'number': row['number'],
|
||
|
|
'street': row['street'],
|
||
|
|
'street2': row['street2'],
|
||
|
|
'city': row['city'],
|
||
|
|
'state': row['state'],
|
||
|
|
'plus4': row['plus4'],
|
||
|
|
'country': row['country'],
|
||
|
|
'lat': float(row['latitude']),
|
||
|
|
'lon': float(row['longitude']),
|
||
|
|
'source': row['source'],
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def lookup_by_street(number, street, city=None, state=None,
|
||
|
|
zipcode=None, country=None, limit=20):
|
||
|
|
"""Match on number + street, with optional qualifiers."""
|
||
|
|
conn = _get_conn()
|
||
|
|
clauses = ['number = ?', 'street = ?']
|
||
|
|
params = [str(number).strip().upper(), street.strip().upper()]
|
||
|
|
|
||
|
|
if city:
|
||
|
|
clauses.append('city = ?')
|
||
|
|
params.append(city.strip().upper())
|
||
|
|
if state:
|
||
|
|
clauses.append('state = ?')
|
||
|
|
params.append(state.strip().upper())
|
||
|
|
if zipcode:
|
||
|
|
clauses.append('zipcode = ?')
|
||
|
|
params.append(zipcode.strip())
|
||
|
|
if country:
|
||
|
|
clauses.append('country = ?')
|
||
|
|
params.append(country.strip().upper())
|
||
|
|
|
||
|
|
sql = f"SELECT * FROM addresses WHERE {' AND '.join(clauses)} LIMIT ?"
|
||
|
|
params.append(limit)
|
||
|
|
|
||
|
|
with _lock:
|
||
|
|
try:
|
||
|
|
rows = conn.execute(sql, params).fetchall()
|
||
|
|
except sqlite3.Error as e:
|
||
|
|
logger.warning("Netsyms lookup_by_street error: %s", e)
|
||
|
|
return []
|
||
|
|
|
||
|
|
results = [_row_to_dict(r) for r in rows]
|
||
|
|
logger.debug("lookup_by_street(%s, %s, city=%s, state=%s) → %d results",
|
||
|
|
number, street, city, state, len(results))
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
def lookup_free_text(query, country_hint=None):
|
||
|
|
"""Parse a free-text address and look it up."""
|
||
|
|
q = query.strip()
|
||
|
|
if not q:
|
||
|
|
return []
|
||
|
|
|
||
|
|
# Strip trailing zipcode if present
|
||
|
|
zipcode = None
|
||
|
|
zip_match = re.search(r'\b(\d{5})\s*$', q)
|
||
|
|
if zip_match:
|
||
|
|
zipcode = zip_match.group(1)
|
||
|
|
q = q[:zip_match.start()].strip().rstrip(',').strip()
|
||
|
|
|
||
|
|
# Strip trailing state
|
||
|
|
tokens = re.split(r'[,\s]+', q)
|
||
|
|
tokens = [t for t in tokens if t]
|
||
|
|
if not tokens:
|
||
|
|
return []
|
||
|
|
|
||
|
|
state = None
|
||
|
|
if len(tokens) >= 2 and tokens[-1].upper() in _STATE_CODES:
|
||
|
|
state = tokens[-1].upper()
|
||
|
|
tokens = tokens[:-1]
|
||
|
|
|
||
|
|
# Leading digits → number
|
||
|
|
number = None
|
||
|
|
if tokens and re.match(r'^\d', tokens[0]):
|
||
|
|
number = tokens[0]
|
||
|
|
tokens = tokens[1:]
|
||
|
|
|
||
|
|
if not tokens:
|
||
|
|
# Only a number, or empty — try zipcode if we have one
|
||
|
|
if zipcode:
|
||
|
|
return lookup_by_zipcode(zipcode, limit=20)
|
||
|
|
return []
|
||
|
|
|
||
|
|
# If state was found and we have 2+ tokens remaining, last token is city
|
||
|
|
city = None
|
||
|
|
if state and len(tokens) >= 2:
|
||
|
|
city = tokens[-1]
|
||
|
|
tokens = tokens[:-1]
|
||
|
|
|
||
|
|
street = ' '.join(tokens)
|
||
|
|
|
||
|
|
if number:
|
||
|
|
results = lookup_by_street(number, street, city=city, state=state,
|
||
|
|
zipcode=zipcode, country=country_hint)
|
||
|
|
if results:
|
||
|
|
logger.debug("lookup_free_text(%r) → %d results via street match",
|
||
|
|
query, len(results))
|
||
|
|
return results
|
||
|
|
|
||
|
|
# Fallback: try zipcode only if available
|
||
|
|
if zipcode:
|
||
|
|
return lookup_by_zipcode(zipcode, limit=20)
|
||
|
|
|
||
|
|
logger.debug("lookup_free_text(%r) → 0 results", query)
|
||
|
|
return []
|
||
|
|
|
||
|
|
|
||
|
|
def lookup_by_zipcode(zipcode, limit=100):
|
||
|
|
"""Direct zipcode lookup."""
|
||
|
|
conn = _get_conn()
|
||
|
|
sql = "SELECT * FROM addresses WHERE zipcode = ? LIMIT ?"
|
||
|
|
params = [zipcode.strip(), limit]
|
||
|
|
|
||
|
|
with _lock:
|
||
|
|
try:
|
||
|
|
rows = conn.execute(sql, params).fetchall()
|
||
|
|
except sqlite3.Error as e:
|
||
|
|
logger.warning("Netsyms lookup_by_zipcode error: %s", e)
|
||
|
|
return []
|
||
|
|
|
||
|
|
results = [_row_to_dict(r) for r in rows]
|
||
|
|
logger.debug("lookup_by_zipcode(%s) → %d results", zipcode, len(results))
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
def health():
|
||
|
|
"""Health check with cached row count."""
|
||
|
|
global _cached_row_count
|
||
|
|
|
||
|
|
try:
|
||
|
|
file_size = os.path.getsize(_DB_PATH)
|
||
|
|
except OSError:
|
||
|
|
return {'ok': False, 'row_count': 0, 'file_size_bytes': 0,
|
||
|
|
'indexed_countries': []}
|
||
|
|
|
||
|
|
try:
|
||
|
|
conn = _get_conn()
|
||
|
|
except Exception:
|
||
|
|
return {'ok': False, 'row_count': 0, 'file_size_bytes': file_size,
|
||
|
|
'indexed_countries': []}
|
||
|
|
|
||
|
|
if _cached_row_count is None:
|
||
|
|
with _lock:
|
||
|
|
if _cached_row_count is None:
|
||
|
|
try:
|
||
|
|
row = conn.execute(
|
||
|
|
"SELECT COUNT(*) AS cnt FROM addresses"
|
||
|
|
).fetchone()
|
||
|
|
_cached_row_count = row['cnt']
|
||
|
|
except sqlite3.Error:
|
||
|
|
_cached_row_count = 0
|
||
|
|
|
||
|
|
with _lock:
|
||
|
|
try:
|
||
|
|
rows = conn.execute(
|
||
|
|
"SELECT DISTINCT country FROM addresses"
|
||
|
|
).fetchall()
|
||
|
|
countries = sorted(r['country'] for r in rows)
|
||
|
|
except sqlite3.Error:
|
||
|
|
countries = []
|
||
|
|
|
||
|
|
return {
|
||
|
|
'ok': True,
|
||
|
|
'row_count': _cached_row_count,
|
||
|
|
'file_size_bytes': file_size,
|
||
|
|
'indexed_countries': countries,
|
||
|
|
}
|