recon/lib/place_detail.py
Matt 248f4bded4 Fix wiki lookup to match on name+state+country instead of osm_key/osm_value
- Remove osm_key/osm_value from wiki_places lookup query
- Add fallback matching: try state first, then country only
- Parse state/country from wikipedia extratag when address is empty
- Add US states and Canadian provinces parsing for wikipedia titles
- Apply wiki enrichment to cached results (was missing)

Fixes wiki_summary and wiki_url not appearing for boundary/administrative
places like Joliet, IL where OSM returns boundary/administrative but
wiki_places has place/city.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-03 00:17:49 +00:00

943 lines
34 KiB
Python

"""
Place detail proxy — local Nominatim first, Overpass API fallback, SQLite cache.
Overture Maps enrichment layer fills sparse extratags (phone, website, brand).
Provides get_place_detail(osm_type, osm_id) which returns a cleaned dict
matching the response shape for /api/place/<osm_type>/<osm_id>.
"""
import json
import os
import sqlite3
import time
import requests as http_requests
from .osm_categories import humanize_category
from .utils import setup_logging
logger = setup_logging('recon.place_detail')
NOMINATIM_URL = "http://localhost:8010/details.php"
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)"
VALID_OSM_TYPES = {"N", "W", "R"}
# US states and Canadian provinces for wikipedia title parsing
US_STATES = {
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
}
CANADIAN_PROVINCES = {
'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon'
}
def _parse_state_from_wikipedia(wikipedia_tag):
"""Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'"""
if not wikipedia_tag or not wikipedia_tag.startswith('en:'):
return None, None
title = wikipedia_tag[3:]
for state in US_STATES:
if state in title:
return state, 'us'
for prov in CANADIAN_PROVINCES:
if prov in title:
return prov, 'ca'
return None, None
_db_conn = None
# ── SQLite cache ────────────────────────────────────────────────────────
def _get_db():
"""Return a module-level SQLite connection (lazy init)."""
global _db_conn
if _db_conn is not None:
return _db_conn
db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
os.makedirs(db_dir, exist_ok=True)
db_path = os.path.join(db_dir, 'place_cache.db')
_db_conn = sqlite3.connect(db_path, check_same_thread=False)
_db_conn.execute("PRAGMA journal_mode=WAL")
_db_conn.execute("PRAGMA synchronous=NORMAL")
_db_conn.execute("""
CREATE TABLE IF NOT EXISTS place_cache (
osm_type TEXT NOT NULL,
osm_id INTEGER NOT NULL,
data TEXT NOT NULL,
source TEXT NOT NULL,
cached_at INTEGER NOT NULL,
PRIMARY KEY (osm_type, osm_id)
)
""")
_db_conn.commit()
logger.info(f"Place cache DB ready at {db_path}")
return _db_conn
def cache_get(osm_type, osm_id):
"""Return cached place dict or None."""
db = _get_db()
row = db.execute(
"SELECT data FROM place_cache WHERE osm_type=? AND osm_id=?",
(osm_type, osm_id)
).fetchone()
if row:
try:
result = json.loads(row[0])
result['source'] = 'cache'
return result
except (json.JSONDecodeError, TypeError):
pass
return None
def cache_put(osm_type, osm_id, data, source):
"""Store a place detail result in the cache (preserves google columns)."""
db = _get_db()
now = int(time.time())
db.execute("""
INSERT INTO place_cache (osm_type, osm_id, data, source, cached_at)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(osm_type, osm_id) DO UPDATE SET
data = excluded.data,
source = excluded.source,
cached_at = excluded.cached_at
""", (osm_type, osm_id, json.dumps(data), source, now))
db.commit()
# ── Overture enrichment ─────────────────────────────────────────────────
def _enrich_with_overture(result, osm_type, osm_id):
"""
Attempt to enrich a place result with Overture Maps data.
Fills sparse extratags (phone, website, brand) without overwriting existing values.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_overture_enrichment', False):
return result
except Exception:
return result
try:
from .overture import find_by_osm_id, find_by_coords_and_name
except ImportError:
logger.debug("Overture module not available")
return result
enrichment = None
match_method = None
# Strategy 1: OSM cross-reference (exact)
enrichment = find_by_osm_id(osm_type, osm_id)
if enrichment:
match_method = 'osm_xref'
# Strategy 2: Coordinate + name fuzzy (fallback)
if not enrichment and result.get('centroid') and result.get('name'):
centroid = result['centroid']
if centroid.get('lat') and centroid.get('lon'):
enrichment = find_by_coords_and_name(
centroid['lat'], centroid['lon'], result['name']
)
if enrichment:
match_method = 'coord_name_fuzzy'
if not enrichment:
return result
# Fill sparse extratags (never overwrite existing non-null values)
extratags = result.get('extratags', {})
fill_map = [
('phone', 'phone'),
('website', 'website'),
('brand', 'brand_name'),
('brand:wikidata', 'brand_wikidata'),
]
for osm_key, overture_key in fill_map:
if not extratags.get(osm_key) and enrichment.get(overture_key):
extratags[osm_key] = enrichment[overture_key]
result['extratags'] = extratags
# Add source metadata
result['sources'] = {
'primary': result.get('source', 'unknown'),
'enrichment': 'overture',
'overture_match_method': match_method,
'overture_gers_id': enrichment.get('gers_id'),
'overture_confidence': enrichment.get('confidence'),
'overture_basic_category': enrichment.get('basic_category'),
}
logger.debug(f"Overture enrichment for {osm_type}/{osm_id}: {match_method}")
return result
# ── Google Places enrichment (tertiary, gap-fill only) ──────────────
# Business POI classes eligible for Google enrichment
_BUSINESS_CLASSES = {'amenity', 'shop', 'tourism', 'leisure', 'office', 'craft'}
# Fields Google can fill
_GOOGLE_GAP_FIELDS = ('opening_hours', 'phone', 'website')
def _enrich_with_google(result, osm_type, osm_id):
"""
Tertiary enrichment via Google Places (New) API.
Only fires for business-type POIs when opening_hours, phone, or website
are still missing after OSM + Overture enrichment.
Fills only empty fields — never overwrites existing values.
"""
# Check feature flag
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_google_places_enrichment', False):
return result
except Exception:
return result
# Only enrich business-type POIs
poi_class = result.get('class', '')
if poi_class not in _BUSINESS_CLASSES:
return result
# Check if any gap fields are missing
extratags = result.get('extratags', {})
gaps = [f for f in _GOOGLE_GAP_FIELDS if not extratags.get(f)]
if not gaps:
logger.debug(f"google_places: skip {osm_type}/{osm_id} — no gaps")
return result
try:
from . import google_places
except ImportError:
logger.debug("google_places module not available")
return result
# Check Google cache first
cached_pid, cached_data = google_places.cache_get_google(osm_type, osm_id)
if cached_pid and cached_data:
_apply_google_data(result, cached_data, gaps)
result.setdefault('sources', {})['google_places'] = {
'place_id': cached_pid,
'source': 'cache',
}
logger.debug(f"google_places: cache hit for {osm_type}/{osm_id}")
return result
# Skip if already looked up and found nothing (cached_pid is None)
if cached_pid is not None:
return result
# Skip new Google API calls for guest users (cached data already returned above)
from .auth import get_user_id
if not get_user_id():
logger.debug(f"google_places: skip API call for {osm_type}/{osm_id} — guest user")
return result
# Daily cap check
if not google_places.check_daily_cap():
return result
# Search for the place
name = result.get('name', '')
centroid = result.get('centroid', {})
lat = centroid.get('lat')
lon = centroid.get('lon')
if not name or not lat or not lon:
return result
place_id = google_places.search_place(name, lat, lon)
if not place_id:
# Cache the miss to avoid repeated lookups
google_places.cache_put_google(osm_type, osm_id, '__miss__', None)
return result
# Get details
details = google_places.get_place_details(place_id)
if not details:
google_places.cache_put_google(osm_type, osm_id, place_id, None)
return result
# Cache the result
google_places.cache_put_google(osm_type, osm_id, place_id, details)
# Apply to result
_apply_google_data(result, details, gaps)
result.setdefault('sources', {})['google_places'] = {
'place_id': place_id,
'source': 'api',
'daily_count': google_places.get_daily_count(),
}
return result
def _apply_google_data(result, google_data, gaps):
"""Apply Google Places data to fill gap fields only."""
extratags = result.get('extratags', {})
if 'opening_hours' in gaps:
osm_hours = google_data.get('opening_hours')
if osm_hours:
extratags['opening_hours'] = osm_hours
elif google_data.get('opening_hours_raw'):
extratags['opening_hours_raw'] = google_data['opening_hours_raw']
if 'phone' in gaps and google_data.get('phone_number'):
extratags['phone'] = google_data['phone_number']
if 'website' in gaps and google_data.get('website'):
extratags['website'] = google_data['website']
result['extratags'] = extratags
# ── Wiki link rewriting ─────────────────────────────────────────────────
# Extratag keys that may contain wiki references
_WIKI_TAGS = ('wikipedia', 'wikidata', 'wikivoyage', 'appropedia')
def _enrich_wiki_links(result):
"""
Rewrite wiki-related extratags to local Kiwix URLs where available.
Falls back to public URLs. Only runs when has_wiki_rewriting is enabled.
Note: When has_kiwix_wiki is enabled, we skip rewriting 'wikipedia' since
the wiki_index enrichment provides a proper wiki_url field. This keeps
extratags.wikipedia in the original OSM format for frontend link builders.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_wiki_rewriting', False):
return result
# When has_kiwix_wiki is enabled, skip wikipedia rewriting (wiki_url handles it)
has_kiwix_wiki = features.get('has_kiwix_wiki', False)
except Exception:
return result
try:
from .wiki_rewrite import rewrite_wiki_link
except ImportError:
logger.debug("wiki_rewrite module not available")
return result
extratags = result.get('extratags', {})
if not extratags:
return result
rewrites = {}
for tag in _WIKI_TAGS:
# Skip wikipedia when has_kiwix_wiki is enabled (wiki_url provides the local link)
if tag == 'wikipedia' and has_kiwix_wiki:
continue
value = extratags.get(tag)
if not value:
continue
url, status = rewrite_wiki_link(tag, value)
if status != 'original':
extratags[tag] = url
rewrites[tag] = status
if rewrites:
result['extratags'] = extratags
result.setdefault('sources', {})['wiki_rewrites'] = rewrites
logger.debug(f"Wiki rewrites for {result.get('osm_type')}/{result.get('osm_id')}: {rewrites}")
return result
# ── Wiki Index enrichment ───────────────────────────────────────────────
def _enrich_with_wiki_index(result):
"""
Add wiki summary, URLs, and population from wiki_index.db.
Only runs when has_kiwix_wiki is enabled. Direct match only.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_kiwix_wiki', False):
return result
except Exception:
return result
try:
from . import wiki_index
except ImportError:
logger.debug("wiki_index module not available")
return result
if not wiki_index.is_available():
return result
# Extract match criteria from result
name = result.get('name', '')
osm_class = result.get('class', '')
osm_type_tag = result.get('type', '')
address = result.get('address', {})
state = address.get('state', '')
country_code = address.get('country_code', '')
# If state/country missing, try to derive from wikipedia extratag
extratags = result.get('extratags', {})
if (not state or not country_code) and extratags.get('wikipedia'):
derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia'])
if not state and derived_state:
state = derived_state
if not country_code and derived_country:
country_code = derived_country
# Handle boundary/administrative - get actual place type from extratags
# (e.g. boundary:administrative with extratags.place='city' -> place:city)
if osm_class == 'boundary' and osm_type_tag == 'administrative':
place_tag = extratags.get('place') or extratags.get('linked_place')
if place_tag:
osm_class = 'place'
osm_type_tag = place_tag
if not name or not osm_class or not osm_type_tag:
return result
# Look up wiki data
wiki_data = wiki_index.lookup_wiki(name, osm_class, osm_type_tag, state, country_code)
if not wiki_data:
return result
# Add wiki fields to result (additive only)
if 'wiki_summary' in wiki_data:
result['wiki_summary'] = wiki_data['wiki_summary']
if 'wiki_url' in wiki_data:
result['wiki_url'] = wiki_data['wiki_url']
if 'wikivoyage_url' in wiki_data:
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
if 'wiki_population' in wiki_data:
result['wiki_population'] = wiki_data['wiki_population']
result.setdefault('sources', {})['wiki_index'] = True
logger.debug(f"Wiki index enrichment for {name}")
return result
# ── Nominatim parsing ───────────────────────────────────────────────────
# Nominatim address array uses rank_address to indicate what each entry is.
# We map rank ranges to our flat address fields.
RANK_TO_FIELD = {
4: 'country',
5: 'postcode',
6: 'state', # rank 6 = county in US, but we try name matching
8: 'state',
12: 'county',
16: 'city',
20: 'neighbourhood',
22: 'neighbourhood',
26: 'road',
28: 'house_number',
}
def _parse_nominatim_address(address_array, country_code=None):
"""Parse Nominatim's ranked address array into a flat address dict."""
addr = {
'house_number': None,
'road': None,
'neighbourhood': None,
'city': None,
'county': None,
'state': None,
'postcode': None,
'country': None,
'country_code': country_code,
}
if not address_array:
return addr
for entry in address_array:
if not entry.get('isaddress', False):
continue
name = entry.get('localname', '')
rank = entry.get('rank_address', 0)
etype = entry.get('type', '')
eclass = entry.get('class', '')
# Explicit type-based assignments (more reliable than rank alone)
if etype == 'country' and eclass == 'place':
addr['country'] = name
elif etype == 'state' or (eclass == 'boundary' and etype == 'administrative' and rank == 8):
if not addr['state']:
addr['state'] = name
elif etype == 'county' or (eclass == 'boundary' and etype == 'administrative' and rank in (10, 12)):
if not addr['county']:
addr['county'] = name
elif etype in ('city', 'town', 'village', 'hamlet') and eclass == 'place':
if not addr['city']:
addr['city'] = name
elif eclass == 'boundary' and etype == 'administrative' and rank == 16:
# City-level admin boundary (common in US)
if not addr['city']:
addr['city'] = name
elif etype == 'postcode':
addr['postcode'] = name
elif eclass == 'highway' or rank == 26:
if not addr['road']:
addr['road'] = name
elif etype == 'house_number' or rank == 28:
addr['house_number'] = name
elif rank in (20, 22) and not addr['neighbourhood']:
addr['neighbourhood'] = name
# Remove county from output (not in spec)
addr.pop('county', None)
return addr
def _parse_nominatim(data):
"""Parse a Nominatim /details response into our canonical shape."""
osm_type = data.get('osm_type', '')
osm_id = data.get('osm_id', 0)
osm_class = data.get('category', '')
osm_type_tag = data.get('type', '')
# Centroid
centroid_geom = data.get('centroid', {})
coords = centroid_geom.get('coordinates', [0, 0])
centroid = {'lat': coords[1], 'lon': coords[0]} if len(coords) >= 2 else {'lat': 0, 'lon': 0}
# Names
names = data.get('names', {})
display_name = data.get('localname', '') or names.get('name', '')
# Address
address = _parse_nominatim_address(
data.get('address', []),
country_code=data.get('country_code')
)
# Use calculated_postcode if address parse didn't find one
if not address.get('postcode') and data.get('calculated_postcode'):
address['postcode'] = data['calculated_postcode']
# Extratags
raw_extra = data.get('extratags', {})
extratags = {
'opening_hours': raw_extra.get('opening_hours'),
'phone': raw_extra.get('phone') or raw_extra.get('contact:phone'),
'website': raw_extra.get('website') or raw_extra.get('contact:website') or raw_extra.get('url'),
'email': raw_extra.get('email') or raw_extra.get('contact:email'),
'wikipedia': raw_extra.get('wikipedia'),
'wikidata': raw_extra.get('wikidata'),
'cuisine': raw_extra.get('cuisine'),
'operator': raw_extra.get('operator'),
'wheelchair': raw_extra.get('wheelchair'),
'fee': raw_extra.get('fee'),
'takeaway': raw_extra.get('takeaway'),
'place': raw_extra.get('place'),
'linked_place': raw_extra.get('linked_place'),
}
# Category: use extratags.place for boundaries (e.g. "city"), else class/type
effective_class = osm_class
effective_type = osm_type_tag
if osm_class == 'boundary' and osm_type_tag == 'administrative':
place_tag = raw_extra.get('place') or raw_extra.get('linked_place')
if place_tag:
effective_class = 'place'
effective_type = place_tag
category = humanize_category(effective_class, effective_type)
# Filter names: only include extra name tags, not the bare "name"
extra_names = {k: v for k, v in names.items() if k != 'name'} if names else {}
# Boundary geometry (polygon/multipolygon from Nominatim)
boundary = None
geom = data.get('geometry')
if geom and geom.get('type') in ('Polygon', 'MultiPolygon'):
boundary = geom
return {
'osm_type': osm_type,
'osm_id': osm_id,
'name': display_name,
'category': category,
'class': osm_class,
'type': osm_type_tag,
'address': address,
'centroid': centroid,
'extratags': extratags,
'names': extra_names if extra_names else None,
'source': 'nominatim_local',
'boundary': boundary,
}
# ── Overpass parsing ────────────────────────────────────────────────────
OVERPASS_TYPE_MAP = {'N': 'node', 'W': 'way', 'R': 'relation'}
def _build_overpass_query(osm_type, osm_id):
"""Build an Overpass QL query for a single element."""
elem = OVERPASS_TYPE_MAP.get(osm_type)
if not elem:
return None
return f"[out:json][timeout:10];{elem}({osm_id});out tags center;"
def _parse_overpass(data, osm_type, osm_id):
"""Parse an Overpass API response into our canonical shape."""
elements = data.get('elements', [])
if not elements:
return None
elem = elements[0]
tags = elem.get('tags', {})
# Centroid: Overpass returns lat/lon for nodes, center for ways/relations
lat = elem.get('lat') or (elem.get('center', {}).get('lat'))
lon = elem.get('lon') or (elem.get('center', {}).get('lon'))
centroid = {'lat': lat, 'lon': lon} if lat and lon else {'lat': 0, 'lon': 0}
# Determine class/type from tags — Overpass doesn't have a canonical class field
# Use the first recognized class tag
osm_class = ''
osm_type_tag = ''
for cls in ('amenity', 'shop', 'leisure', 'tourism', 'natural', 'highway',
'boundary', 'place', 'building', 'waterway', 'landuse', 'historic'):
if cls in tags:
osm_class = cls
osm_type_tag = tags[cls]
break
category = humanize_category(osm_class, osm_type_tag)
# Address from addr:* tags
address = {
'house_number': tags.get('addr:housenumber'),
'road': tags.get('addr:street'),
'neighbourhood': tags.get('addr:suburb') or tags.get('addr:neighbourhood'),
'city': tags.get('addr:city'),
'state': tags.get('addr:state'),
'postcode': tags.get('addr:postcode'),
'country': tags.get('addr:country'),
'country_code': tags.get('addr:country_code',
tags.get('addr:country', '')).lower()[:2] or None,
}
# Extratags
extratags = {
'opening_hours': tags.get('opening_hours'),
'phone': tags.get('phone') or tags.get('contact:phone'),
'website': tags.get('website') or tags.get('contact:website') or tags.get('url'),
'email': tags.get('email') or tags.get('contact:email'),
'wikipedia': tags.get('wikipedia'),
'wikidata': tags.get('wikidata'),
'cuisine': tags.get('cuisine'),
'operator': tags.get('operator'),
'wheelchair': tags.get('wheelchair'),
'fee': tags.get('fee'),
'takeaway': tags.get('takeaway'),
}
# Names
name = tags.get('name', '')
extra_names = {}
for k, v in tags.items():
if k.startswith('name:') or k in ('alt_name', 'old_name', 'short_name', 'official_name'):
extra_names[k] = v
return {
'osm_type': osm_type,
'osm_id': osm_id,
'name': name,
'category': category,
'class': osm_class,
'type': osm_type_tag,
'address': address,
'centroid': centroid,
'extratags': extratags,
'names': extra_names if extra_names else None,
'source': 'overpass',
}
# ── Public API ──────────────────────────────────────────────────────────
def get_place_detail(osm_type, osm_id):
"""
Fetch place details for an OSM element.
Returns (dict, status_code):
- (data, 200) on success
- (error_dict, 404) if not found in any source
- (error_dict, 502) if both sources error
"""
osm_type = osm_type.upper()
if osm_type not in VALID_OSM_TYPES:
return {'error': f'Invalid osm_type: {osm_type}. Must be N, W, or R.'}, 400
if osm_id <= 0:
return {'error': 'osm_id must be a positive integer'}, 400
# 1. Check cache
cached = cache_get(osm_type, osm_id)
if cached:
cached = _enrich_with_wiki_index(cached)
logger.debug(f"Cache hit: {osm_type}/{osm_id}")
return cached, 200
# 2. Try local Nominatim first
nominatim_result = None
nominatim_error = None
try:
resp = http_requests.get(NOMINATIM_URL, params={
'osmtype': osm_type,
'osmid': osm_id,
'format': 'json',
'addressdetails': 1,
'hierarchy': 0,
'keywords': 0,
'polygon_geojson': 1,
}, timeout=5)
if resp.status_code == 200:
data = resp.json()
# Nominatim returns a result even for IDs not in its DB,
# but they'll have empty/minimal data. Check for osm_id match.
if data.get('osm_id') == osm_id:
nominatim_result = _parse_nominatim(data)
logger.debug(f"Nominatim hit: {osm_type}/{osm_id}")
except Exception as e:
nominatim_error = str(e)
logger.warning(f"Nominatim error for {osm_type}/{osm_id}: {e}")
if nominatim_result:
nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_wiki_links(nominatim_result)
nominatim_result = _enrich_with_wiki_index(nominatim_result)
cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local')
return nominatim_result, 200
# 3. Fallback to Overpass
overpass_result = None
overpass_error = None
try:
query = _build_overpass_query(osm_type, osm_id)
if query:
resp = http_requests.post(
OVERPASS_URL,
data={'data': query},
headers={'User-Agent': OVERPASS_UA},
timeout=10,
)
if resp.status_code == 200:
data = resp.json()
overpass_result = _parse_overpass(data, osm_type, osm_id)
if overpass_result:
logger.debug(f"Overpass hit: {osm_type}/{osm_id}")
elif resp.status_code == 429:
overpass_error = "Overpass rate limited"
logger.warning(f"Overpass 429 for {osm_type}/{osm_id}")
else:
overpass_error = f"Overpass HTTP {resp.status_code}"
except Exception as e:
overpass_error = str(e)
logger.warning(f"Overpass error for {osm_type}/{osm_id}: {e}")
if overpass_result:
overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id)
overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id)
overpass_result = _enrich_wiki_links(overpass_result)
overpass_result = _enrich_with_wiki_index(overpass_result)
cache_put(osm_type, osm_id, overpass_result, 'overpass')
return overpass_result, 200
# 4. Both failed
if nominatim_error and overpass_error:
logger.error(f"Both sources failed for {osm_type}/{osm_id}: "
f"Nominatim={nominatim_error}, Overpass={overpass_error}")
return {'error': 'Both data sources unavailable'}, 502
# Not found in either source (no errors, just empty results)
return {'error': f'{osm_type}/{osm_id} not found'}, 404
# ── Wikidata lookup ─────────────────────────────────────────────────────
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"
def get_place_by_wikidata(wikidata_id):
"""
Fetch place details from Wikidata entity.
Returns (dict, status_code):
- (data, 200) on success
- (error_dict, 404) if entity not found
- (error_dict, 400) if invalid ID format
- (error_dict, 502) on API error
"""
# Validate wikidata ID format (Q followed by digits)
wikidata_id = wikidata_id.upper().strip()
if not wikidata_id.startswith("Q") or not wikidata_id[1:].isdigit():
return {"error": f"Invalid wikidata ID: {wikidata_id}. Must be Q followed by digits."}, 400
try:
resp = http_requests.get(WIKIDATA_API_URL, params={
"action": "wbgetentities",
"ids": wikidata_id,
"format": "json",
"languages": "en",
"props": "labels|descriptions|claims|sitelinks",
}, timeout=10, headers={"User-Agent": "Navi/1.0 (forge.echo6.co/matt/recon)"})
if resp.status_code != 200:
logger.warning(f"Wikidata API error for {wikidata_id}: HTTP {resp.status_code}")
return {"error": "Wikidata API error"}, 502
data = resp.json()
entities = data.get("entities", {})
entity = entities.get(wikidata_id)
if not entity or entity.get("missing"):
return {"error": f"Wikidata entity {wikidata_id} not found"}, 404
# Extract basic info
labels = entity.get("labels", {})
descriptions = entity.get("descriptions", {})
claims = entity.get("claims", {})
name = labels.get("en", {}).get("value", wikidata_id)
description = descriptions.get("en", {}).get("value", "")
# Extract coordinates from P625 (coordinate location)
lat, lon = None, None
if "P625" in claims:
coord_claim = claims["P625"]
if coord_claim and coord_claim[0].get("mainsnak", {}).get("datavalue"):
coord_val = coord_claim[0]["mainsnak"]["datavalue"]["value"]
lat = coord_val.get("latitude")
lon = coord_val.get("longitude")
# Extract population from P1082
population = None
if "P1082" in claims:
pop_claims = claims["P1082"]
if pop_claims:
# Get the most recent population value
for claim in pop_claims:
if claim.get("mainsnak", {}).get("datavalue"):
try:
population = int(claim["mainsnak"]["datavalue"]["value"]["amount"].lstrip("+"))
break
except (KeyError, ValueError):
pass
# Extract country from P17
country = None
if "P17" in claims:
country_claims = claims["P17"]
if country_claims and country_claims[0].get("mainsnak", {}).get("datavalue"):
country_id = country_claims[0]["mainsnak"]["datavalue"]["value"]["id"]
# Could resolve this to a name, but for now just store the ID
# Extract instance of (P31) for type classification
instance_of = []
if "P31" in claims:
for claim in claims["P31"]:
if claim.get("mainsnak", {}).get("datavalue"):
instance_of.append(claim["mainsnak"]["datavalue"]["value"]["id"])
# Extract OSM relation ID if available (P402)
osm_relation_id = None
if "P402" in claims:
osm_claims = claims["P402"]
if osm_claims and osm_claims[0].get("mainsnak", {}).get("datavalue"):
osm_relation_id = osm_claims[0]["mainsnak"]["datavalue"]["value"]
# Extract Wikipedia sitelink
sitelinks = entity.get("sitelinks", {})
wikipedia = None
if "enwiki" in sitelinks:
wiki_title = sitelinks["enwiki"].get("title", "")
if wiki_title:
wikipedia = f"en:{wiki_title}"
result = {
"wikidata_id": wikidata_id,
"name": name,
"description": description,
"centroid": {"lat": lat, "lon": lon} if lat and lon else None,
"population": population,
"instance_of": instance_of,
"osm_relation_id": osm_relation_id,
"source": "wikidata",
"extratags": {
"wikidata": wikidata_id,
},
}
if wikipedia:
result["extratags"]["wikipedia"] = wikipedia
# Fetch boundary polygon from Nominatim if we have an OSM relation ID
boundary = None
if osm_relation_id:
try:
nom_resp = http_requests.get(NOMINATIM_URL, params={
'osmtype': 'R',
'osmid': osm_relation_id,
'format': 'json',
'polygon_geojson': 1,
}, timeout=5)
if nom_resp.status_code == 200:
nom_data = nom_resp.json()
geom = nom_data.get('geometry')
if geom and geom.get('type') in ('Polygon', 'MultiPolygon'):
boundary = geom
logger.debug(f"Wikidata boundary hit for {wikidata_id}")
except Exception as e:
logger.debug(f"Wikidata boundary fetch failed: {e}")
result["boundary"] = boundary
logger.debug(f"Wikidata hit: {wikidata_id} -> {name}")
return result, 200
except Exception as e:
logger.warning(f"Wikidata error for {wikidata_id}: {e}")
return {"error": "Wikidata lookup failed"}, 502