Add Overture Maps POI enrichment layer for place details

Ingests 20.9M North America places from Overture Maps Foundation
(release 2026-04-15.0) into PostgreSQL. Enriches /api/place responses
with phone, website, and brand data via spatial + fuzzy name matching
when OSM extratags are sparse.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-04-21 16:51:25 +00:00
commit 65693d15aa
6 changed files with 597 additions and 0 deletions

View file

@ -39,6 +39,7 @@ features:
has_traffic_overlay: true has_traffic_overlay: true
has_landclass: false has_landclass: false
has_address_book_write: false has_address_book_write: false
has_overture_enrichment: true
defaults: defaults:
center: [42.5736, -114.6066] center: [42.5736, -114.6066]

View file

@ -34,6 +34,7 @@ features:
has_traffic_overlay: false has_traffic_overlay: false
has_landclass: false has_landclass: false
has_address_book_write: true has_address_book_write: true
has_overture_enrichment: false
defaults: defaults:
center: [44.0, -114.0] center: [44.0, -114.0]

View file

@ -39,6 +39,7 @@ features:
has_traffic_overlay: true has_traffic_overlay: true
has_landclass: true has_landclass: true
has_address_book_write: true has_address_book_write: true
has_overture_enrichment: false
defaults: defaults:
center: [44.0, -114.0] center: [44.0, -114.0]

170
lib/overture.py Normal file
View file

@ -0,0 +1,170 @@
"""
Overture Maps enrichment layer.
Provides lookup functions against the local PostgreSQL Overture Places database.
Two strategies:
1. find_by_osm_id exact match via OSM cross-reference index
2. find_by_coords_and_name spatial + fuzzy name fallback
Connection pool is lazy-initialized on first call. If PostgreSQL is unreachable,
functions return None gracefully (feature degrades, doesn't crash).
"""
import json
import os
import psycopg2
import psycopg2.pool
from .utils import setup_logging
logger = setup_logging('recon.overture')
_pool = None
_pool_failed = False
# Map full OSM type names to single-letter codes used in Overture sources
OSM_TYPE_MAP = {
'N': 'n', 'W': 'w', 'R': 'r',
'node': 'n', 'way': 'w', 'relation': 'r',
'n': 'n', 'w': 'w', 'r': 'r',
}
def _get_pool():
"""Lazy-init the connection pool. Returns None if Postgres is unreachable."""
global _pool, _pool_failed
if _pool is not None:
return _pool
if _pool_failed:
return None
try:
_pool = psycopg2.pool.SimpleConnectionPool(
minconn=1,
maxconn=3,
host=os.environ.get('OVERTURE_DB_HOST', 'localhost'),
port=int(os.environ.get('OVERTURE_DB_PORT', '5432')),
dbname=os.environ.get('OVERTURE_DB_NAME', 'overture'),
user=os.environ.get('OVERTURE_DB_USER', 'overture'),
password=os.environ.get('OVERTURE_DB_PASSWORD', ''),
connect_timeout=5,
)
logger.info("Overture PostgreSQL connection pool initialized")
return _pool
except Exception as e:
_pool_failed = True
logger.warning(f"Overture PostgreSQL unavailable, enrichment disabled: {e}")
return None
def _query(sql, params):
"""Execute a query and return the first row as a dict, or None."""
pool = _get_pool()
if pool is None:
return None
conn = None
try:
conn = pool.getconn()
with conn.cursor() as cur:
cur.execute(sql, params)
row = cur.fetchone()
if row is None:
return None
cols = [desc[0] for desc in cur.description]
return dict(zip(cols, row))
except Exception as e:
logger.warning(f"Overture query error: {e}")
if conn:
try:
conn.rollback()
except Exception:
pass
return None
finally:
if conn:
try:
pool.putconn(conn)
except Exception:
pass
def _format_result(row, match_method):
"""Convert a database row dict to the enrichment result shape."""
if not row:
return None
socials = row.get('socials')
if isinstance(socials, str):
try:
socials = json.loads(socials)
except (json.JSONDecodeError, TypeError):
socials = None
return {
'phone': row.get('phone'),
'website': row.get('website'),
'socials': socials,
'brand_name': row.get('brand_name'),
'brand_wikidata': row.get('brand_wikidata'),
'basic_category': row.get('basic_category'),
'confidence': row.get('confidence'),
'gers_id': row.get('id'),
'match_method': match_method,
}
def find_by_osm_id(osm_type, osm_id):
"""
Look up an Overture place by its OSM cross-reference.
Args:
osm_type: OSM type 'N', 'W', 'R', 'node', 'way', 'relation', or single letter
osm_id: OSM numeric ID
Returns:
Enrichment dict or None
"""
type_letter = OSM_TYPE_MAP.get(osm_type)
if not type_letter:
return None
row = _query(
"""SELECT id, name, basic_category, confidence,
phone, website, socials, brand_name, brand_wikidata
FROM places
WHERE osm_type = %s AND osm_id = %s
LIMIT 1""",
(type_letter, int(osm_id))
)
return _format_result(row, 'osm_xref')
def find_by_coords_and_name(lat, lon, name, radius_m=100):
"""
Look up an Overture place by spatial proximity + fuzzy name match.
Args:
lat: Latitude
lon: Longitude
name: Place name to fuzzy-match
radius_m: Search radius in meters (default 100)
Returns:
Enrichment dict or None
"""
if not name or not lat or not lon:
return None
row = _query(
"""SELECT id, name, basic_category, confidence,
phone, website, socials, brand_name, brand_wikidata,
similarity(name, %s) AS sim
FROM places
WHERE ST_DWithin(geometry::geography, ST_MakePoint(%s, %s)::geography, %s)
AND similarity(name, %s) > 0.4
ORDER BY sim DESC, ST_Distance(geometry::geography, ST_MakePoint(%s, %s)::geography) ASC
LIMIT 1""",
(name, lon, lat, radius_m, name, lon, lat)
)
return _format_result(row, 'coord_name_fuzzy')

View file

@ -1,5 +1,6 @@
""" """
Place detail proxy local Nominatim first, Overpass API fallback, SQLite cache. Place detail proxy local Nominatim first, Overpass API fallback, SQLite cache.
Overture Maps enrichment layer fills sparse extratags (phone, website, brand).
Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict
matching the response shape for /api/place/<osm_type>/<osm_id>. matching the response shape for /api/place/<osm_type>/<osm_id>.
@ -82,6 +83,77 @@ def cache_put(osm_type, osm_id, data, source):
db.commit() db.commit()
# ── Overture enrichment ─────────────────────────────────────────────────
def _enrich_with_overture(result, osm_type, osm_id):
"""
Attempt to enrich a place result with Overture Maps data.
Fills sparse extratags (phone, website, brand) without overwriting existing values.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_overture_enrichment', False):
return result
except Exception:
return result
try:
from .overture import find_by_osm_id, find_by_coords_and_name
except ImportError:
logger.debug("Overture module not available")
return result
enrichment = None
match_method = None
# Strategy 1: OSM cross-reference (exact)
enrichment = find_by_osm_id(osm_type, osm_id)
if enrichment:
match_method = 'osm_xref'
# Strategy 2: Coordinate + name fuzzy (fallback)
if not enrichment and result.get('centroid') and result.get('name'):
centroid = result['centroid']
if centroid.get('lat') and centroid.get('lon'):
enrichment = find_by_coords_and_name(
centroid['lat'], centroid['lon'], result['name']
)
if enrichment:
match_method = 'coord_name_fuzzy'
if not enrichment:
return result
# Fill sparse extratags (never overwrite existing non-null values)
extratags = result.get('extratags', {})
fill_map = [
('phone', 'phone'),
('website', 'website'),
('brand', 'brand_name'),
('brand:wikidata', 'brand_wikidata'),
]
for osm_key, overture_key in fill_map:
if not extratags.get(osm_key) and enrichment.get(overture_key):
extratags[osm_key] = enrichment[overture_key]
result['extratags'] = extratags
# Add source metadata
result['sources'] = {
'primary': result.get('source', 'unknown'),
'enrichment': 'overture',
'overture_match_method': match_method,
'overture_gers_id': enrichment.get('gers_id'),
'overture_confidence': enrichment.get('confidence'),
'overture_basic_category': enrichment.get('basic_category'),
}
logger.debug(f"Overture enrichment for {osm_type}/{osm_id}: {match_method}")
return result
# ── Nominatim parsing ─────────────────────────────────────────────────── # ── Nominatim parsing ───────────────────────────────────────────────────
# Nominatim address array uses rank_address to indicate what each entry is. # Nominatim address array uses rank_address to indicate what each entry is.
@ -368,6 +440,7 @@ def get_place_detail(osm_type, osm_id):
logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}") logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}")
if nominatim_result: if nominatim_result:
nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id)
cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local')
return nominatim_result, 200 return nominatim_result, 200
@ -398,6 +471,7 @@ def get_place_detail(osm_type, osm_id):
logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}") logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}")
if overpass_result: if overpass_result:
overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id)
cache_put(osm_type, osm_id, overpass_result, 'overpass') cache_put(osm_type, osm_id, overpass_result, 'overpass')
return overpass_result, 200 return overpass_result, 200

350
scripts/overture_import.py Normal file
View file

@ -0,0 +1,350 @@
#!/usr/bin/env python3
"""Overture Maps Places → PostgreSQL import script (v2).
Downloads Overture Places Parquet from S3 via DuckDB (public bucket, no credentials),
filters to North America bounding box, and inserts into local PostgreSQL with PostGIS.
Usage:
cd /opt/recon && venv/bin/python scripts/overture_import.py
Re-runnable (idempotent via UPSERT).
"""
import json
import logging
import os
import re
import sys
import time
import duckdb
import psycopg2
import psycopg2.extras
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s',
datefmt='%H:%M:%S'
)
log = logging.getLogger('overture_import')
# --- Config ---
OVERTURE_RELEASE = '2026-04-15.0'
S3_PATH = f's3://overturemaps-us-west-2/release/{OVERTURE_RELEASE}/theme=places/type=place/*'
# North America bounding box (generous — includes Hawaii, Puerto Rico, Canada)
BBOX = {
'xmin': -170.0,
'xmax': -50.0,
'ymin': 15.0,
'ymax': 85.0,
}
BATCH_SIZE = 50_000
OSM_RECORD_RE = re.compile(r'^([nwr])(\d+)@\d+$')
DB_CONFIG = {
'host': os.environ.get('OVERTURE_DB_HOST', 'localhost'),
'port': int(os.environ.get('OVERTURE_DB_PORT', '5432')),
'dbname': os.environ.get('OVERTURE_DB_NAME', 'overture'),
'user': os.environ.get('OVERTURE_DB_USER', 'overture'),
'password': os.environ.get('OVERTURE_DB_PASSWORD', ''),
}
def create_table(conn):
"""Create places table and indexes if they don't exist."""
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS places (
id TEXT PRIMARY KEY,
geometry GEOMETRY(Point, 4326),
name TEXT,
basic_category TEXT,
confidence REAL,
phone TEXT,
website TEXT,
socials JSONB,
brand_name TEXT,
brand_wikidata TEXT,
osm_type CHAR(1),
osm_id BIGINT,
source_record_id TEXT,
raw_sources JSONB
);
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_places_osm
ON places(osm_type, osm_id) WHERE osm_type IS NOT NULL;
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_places_geom
ON places USING GIST(geometry);
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_places_name_trgm
ON places USING GIN(name gin_trgm_ops);
""")
conn.commit()
log.info('Table and indexes ready')
def parse_osm_ref(sources):
"""Extract OSM type letter and ID from Overture sources array."""
if not sources:
return None, None, None
for src in sources:
record_id = None
if isinstance(src, dict):
record_id = src.get('record_id', '')
elif hasattr(src, '__getitem__'):
# DuckDB struct — try attribute access
try:
record_id = src['record_id']
except (KeyError, TypeError, IndexError):
pass
if not record_id:
continue
m = OSM_RECORD_RE.match(str(record_id))
if m:
return m.group(1), int(m.group(2)), str(record_id)
return None, None, None
def run_import():
"""Main import: DuckDB reads S3 Parquet → PostgreSQL via chunked OFFSET/LIMIT."""
log.info(f'Overture release: {OVERTURE_RELEASE}')
log.info(f'S3 path: {S3_PATH}')
log.info(f'Bounding box: {BBOX}')
# Connect to PostgreSQL
conn = psycopg2.connect(**DB_CONFIG)
conn.autocommit = False
create_table(conn)
# Set up DuckDB with httpfs and spatial for S3 access
duck = duckdb.connect()
duck.execute("INSTALL httpfs; LOAD httpfs;")
duck.execute("INSTALL spatial; LOAD spatial;")
duck.execute("SET s3_region='us-west-2';")
# Use a materialized approach: DuckDB query → Arrow → iterate in Python
query = f"""
SELECT
id,
ST_X(geometry) AS lon,
ST_Y(geometry) AS lat,
names.primary AS name,
basic_category,
confidence,
phones,
websites,
socials,
brand,
sources
FROM read_parquet('{S3_PATH}', hive_partitioning=true)
WHERE bbox.xmin >= {BBOX['xmin']}
AND bbox.xmax <= {BBOX['xmax']}
AND bbox.ymin >= {BBOX['ymin']}
AND bbox.ymax <= {BBOX['ymax']}
"""
log.info('Starting DuckDB query against S3 (this will take several minutes)...')
t_start = time.time()
# Execute and fetch all as Arrow for efficient iteration
result_rel = duck.sql(query)
upsert_sql = """
INSERT INTO places (id, geometry, name, basic_category, confidence,
phone, website, socials, brand_name, brand_wikidata,
osm_type, osm_id, source_record_id, raw_sources)
VALUES %s
ON CONFLICT (id) DO UPDATE SET
geometry = EXCLUDED.geometry,
name = EXCLUDED.name,
basic_category = EXCLUDED.basic_category,
confidence = EXCLUDED.confidence,
phone = EXCLUDED.phone,
website = EXCLUDED.website,
socials = EXCLUDED.socials,
brand_name = EXCLUDED.brand_name,
brand_wikidata = EXCLUDED.brand_wikidata,
osm_type = EXCLUDED.osm_type,
osm_id = EXCLUDED.osm_id,
source_record_id = EXCLUDED.source_record_id,
raw_sources = EXCLUDED.raw_sources
"""
template = """(
%(id)s,
ST_SetSRID(ST_MakePoint(%(lon)s, %(lat)s), 4326),
%(name)s,
%(basic_category)s,
%(confidence)s,
%(phone)s,
%(website)s,
%(socials)s::jsonb,
%(brand_name)s,
%(brand_wikidata)s,
%(osm_type)s,
%(osm_id)s,
%(source_record_id)s,
%(raw_sources)s::jsonb
)"""
total = 0
osm_refs = 0
batch = []
log.info('DuckDB query executing, fetching results in chunks...')
# Fetch in chunks using fetchmany on the relation
chunk_size = BATCH_SIZE
while True:
chunk = result_rel.fetchmany(chunk_size)
if not chunk:
break
for row in chunk:
row_id = row[0]
lon = row[1]
lat = row[2]
name = row[3]
basic_cat = row[4]
conf = row[5]
phones = row[6]
websites = row[7]
socials_raw = row[8]
brand_raw = row[9]
sources_raw = row[10]
if lon is None or lat is None:
continue
# Phone: first element of VARCHAR[]
phone = None
if phones and len(phones) > 0:
phone = str(phones[0]) if phones[0] else None
# Website: first element of VARCHAR[]
website = None
if websites and len(websites) > 0:
website = str(websites[0]) if websites[0] else None
# Socials: VARCHAR[] → JSON array of strings
socials_json = None
if socials_raw and len(socials_raw) > 0:
socials_json = json.dumps([str(s) for s in socials_raw if s])
# Brand: struct with wikidata and names.primary
brand_name = None
brand_wikidata = None
if brand_raw:
try:
if isinstance(brand_raw, dict):
brand_wikidata = brand_raw.get('wikidata')
names_struct = brand_raw.get('names')
if names_struct and isinstance(names_struct, dict):
brand_name = names_struct.get('primary')
else:
# DuckDB struct — access by key
brand_wikidata = brand_raw['wikidata'] if 'wikidata' in dir(brand_raw) else None
try:
brand_wikidata = brand_raw[0] # wikidata is first field
names_struct = brand_raw[1] # names is second field
if names_struct:
brand_name = names_struct[0] # primary is first field
except (IndexError, TypeError):
pass
except Exception:
pass
# Sources: parse OSM cross-reference
sources_list = None
if sources_raw:
if isinstance(sources_raw, (list, tuple)):
sources_list = []
for s in sources_raw:
if isinstance(s, dict):
sources_list.append(s)
else:
# DuckDB struct tuple — convert
try:
sources_list.append({
'dataset': s[1] if len(s) > 1 else None,
'record_id': s[3] if len(s) > 3 else None,
})
except (TypeError, IndexError):
pass
osm_type_letter, osm_id_val, source_record_id = parse_osm_ref(sources_list)
if osm_type_letter:
osm_refs += 1
raw_sources_json = json.dumps(sources_list) if sources_list else None
batch.append({
'id': row_id,
'lon': float(lon),
'lat': float(lat),
'name': name,
'basic_category': basic_cat,
'confidence': float(conf) if conf is not None else None,
'phone': phone,
'website': website,
'socials': socials_json,
'brand_name': brand_name,
'brand_wikidata': brand_wikidata,
'osm_type': osm_type_letter,
'osm_id': osm_id_val,
'source_record_id': source_record_id,
'raw_sources': raw_sources_json,
})
if len(batch) >= BATCH_SIZE:
with conn.cursor() as cur:
psycopg2.extras.execute_values(
cur, upsert_sql, batch,
template=template,
page_size=BATCH_SIZE
)
conn.commit()
total += len(batch)
elapsed = time.time() - t_start
rate = total / elapsed if elapsed > 0 else 0
log.info(f'Inserted {total:,} rows ({osm_refs:,} OSM xrefs) '
f'[{rate:.0f} rows/sec, {elapsed:.0f}s elapsed]')
batch = []
# Flush remaining
if batch:
with conn.cursor() as cur:
psycopg2.extras.execute_values(
cur, upsert_sql, batch,
template=template,
page_size=BATCH_SIZE
)
conn.commit()
total += len(batch)
duck.close()
# Final stats
elapsed = time.time() - t_start
log.info(f'Import complete: {total:,} rows, {osm_refs:,} OSM cross-refs, '
f'{elapsed:.0f}s total ({total/elapsed:.0f} rows/sec)')
# Verify
with conn.cursor() as cur:
cur.execute("SELECT count(*) FROM places")
count = cur.fetchone()[0]
cur.execute("SELECT count(*) FROM places WHERE osm_type IS NOT NULL")
osm_count = cur.fetchone()[0]
log.info(f'Final table: {count:,} total rows, {osm_count:,} with OSM cross-references')
conn.close()
if __name__ == '__main__':
run_import()