feat(place): add wiki location index lookup

Add wiki summary, URLs, and population data from wiki_index.db to
place detail and reverse geocode responses.

- Add lib/wiki_index.py: SQLite read-only lookup module
- Enrich /api/place responses via _enrich_with_wiki_index()
- Enrich /api/reverse responses via _enrich_reverse_result_with_wiki()
- Gate on has_kiwix_wiki feature flag (default false in home.yaml)
- Direct match only on place_name + osm_key + osm_value + state + country_code
- Additive fields: wiki_summary, wiki_url, wikivoyage_url, wiki_population

DB path: /opt/recon/data/wiki_index.db

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-29 17:07:31 +00:00
commit 2d1dcbf70c
3 changed files with 260 additions and 0 deletions

View file

@ -15,6 +15,66 @@ from .utils import setup_logging
logger = setup_logging('recon.netsyms_api') logger = setup_logging('recon.netsyms_api')
def _enrich_reverse_result_with_wiki(result):
"""
Add wiki data to a reverse geocode result if available.
Only runs when has_kiwix_wiki is enabled.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_kiwix_wiki', False):
return result
except Exception:
return result
try:
from . import wiki_index
except ImportError:
return result
if not wiki_index.is_available():
return result
# Extract match criteria from Photon raw props
raw = result.get('raw', {})
place_name = raw.get('name', '')
osm_key = raw.get('osm_key', '')
osm_value = raw.get('osm_value', '')
state = raw.get('state', '')
country = raw.get('country', '')
# Extract country code (Photon uses full country name, we need code)
country_code = raw.get('countrycode', '').lower()
if not country_code:
country_lower = country.lower() if country else ''
if 'united states' in country_lower or country_lower == 'usa':
country_code = 'us'
elif 'canada' in country_lower:
country_code = 'ca'
if not place_name or not osm_key or not osm_value or not country_code:
return result
# Look up wiki data
wiki_data = wiki_index.lookup_wiki(place_name, osm_key, osm_value, state, country_code)
if wiki_data:
# Add wiki fields to result (additive only)
if 'wiki_summary' in wiki_data:
result['wiki_summary'] = wiki_data['wiki_summary']
if 'wiki_url' in wiki_data:
result['wiki_url'] = wiki_data['wiki_url']
if 'wikivoyage_url' in wiki_data:
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
if 'wiki_population' in wiki_data:
result['wiki_population'] = wiki_data['wiki_population']
return result
netsyms_bp = Blueprint('netsyms', __name__) netsyms_bp = Blueprint('netsyms', __name__)
geocode_bp = Blueprint('geocode', __name__) geocode_bp = Blueprint('geocode', __name__)
@ -123,4 +183,7 @@ def api_reverse():
from .geocode import _parse_photon_features from .geocode import _parse_photon_features
results = _parse_photon_features(features, source='photon_reverse') results = _parse_photon_features(features, source='photon_reverse')
# Enrich results with wiki data
results = [_enrich_reverse_result_with_wiki(r) for r in results]
return jsonify({'query': query_str, 'results': results, 'count': len(results)}) return jsonify({'query': query_str, 'results': results, 'count': len(results)})

View file

@ -328,6 +328,65 @@ def _enrich_wiki_links(result):
return result return result
# ── Wiki Index enrichment ───────────────────────────────────────────────
def _enrich_with_wiki_index(result):
"""
Add wiki summary, URLs, and population from wiki_index.db.
Only runs when has_kiwix_wiki is enabled. Direct match only.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_kiwix_wiki', False):
return result
except Exception:
return result
try:
from . import wiki_index
except ImportError:
logger.debug("wiki_index module not available")
return result
if not wiki_index.is_available():
return result
# Extract match criteria from result
name = result.get('name', '')
osm_class = result.get('class', '')
osm_type_tag = result.get('type', '')
address = result.get('address', {})
state = address.get('state', '')
country_code = address.get('country_code', '')
if not name or not osm_class or not osm_type_tag:
return result
# Look up wiki data
wiki_data = wiki_index.lookup_wiki(name, osm_class, osm_type_tag, state, country_code)
if not wiki_data:
return result
# Add wiki fields to result (additive only)
if 'wiki_summary' in wiki_data:
result['wiki_summary'] = wiki_data['wiki_summary']
if 'wiki_url' in wiki_data:
result['wiki_url'] = wiki_data['wiki_url']
if 'wikivoyage_url' in wiki_data:
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
if 'wiki_population' in wiki_data:
result['wiki_population'] = wiki_data['wiki_population']
result.setdefault('sources', {})['wiki_index'] = True
logger.debug(f"Wiki index enrichment for {name}")
return result
# ── Nominatim parsing ─────────────────────────────────────────────────── # ── Nominatim parsing ───────────────────────────────────────────────────
# Nominatim address array uses rank_address to indicate what each entry is. # Nominatim address array uses rank_address to indicate what each entry is.
@ -625,6 +684,7 @@ def get_place_detail(osm_type, osm_id):
nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_wiki_links(nominatim_result) nominatim_result = _enrich_wiki_links(nominatim_result)
nominatim_result = _enrich_with_wiki_index(nominatim_result)
cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local')
return nominatim_result, 200 return nominatim_result, 200
@ -658,6 +718,7 @@ def get_place_detail(osm_type, osm_id):
overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id)
overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id)
overpass_result = _enrich_wiki_links(overpass_result) overpass_result = _enrich_wiki_links(overpass_result)
overpass_result = _enrich_with_wiki_index(overpass_result)
cache_put(osm_type, osm_id, overpass_result, 'overpass') cache_put(osm_type, osm_id, overpass_result, 'overpass')
return overpass_result, 200 return overpass_result, 200

136
lib/wiki_index.py Normal file
View file

@ -0,0 +1,136 @@
"""
Wiki location index lookup.
Provides wiki summaries, URLs, and population data from the wiki_index.db
for place detail enrichment. Read-only, opened once at startup.
DB path: /opt/recon/data/wiki_index.db
"""
import os
import sqlite3
from .utils import setup_logging
logger = setup_logging('recon.wiki_index')
_db_conn = None
_zim_books = {}
def _get_db():
"""Return a module-level SQLite connection (lazy init, read-only)."""
global _db_conn, _zim_books
if _db_conn is not None:
return _db_conn
db_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'data', 'wiki_index.db'
)
if not os.path.exists(db_path):
logger.warning(f"Wiki index DB not found at {db_path}")
return None
try:
# Open read-only with URI
_db_conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False)
_db_conn.row_factory = sqlite3.Row
# Load zim_books for URL construction
rows = _db_conn.execute("SELECT book_type, public_url FROM zim_books").fetchall()
for row in rows:
_zim_books[row['book_type']] = row['public_url']
logger.info(f"Wiki index DB ready at {db_path} ({len(_zim_books)} ZIM books)")
return _db_conn
except Exception as e:
logger.error(f"Failed to open wiki index DB: {e}")
return None
def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
"""
Look up wiki data for a place by exact match.
Args:
place_name: Name of the place (e.g., "Twin Falls")
osm_key: OSM key (e.g., "place", "natural", "waterway")
osm_value: OSM value (e.g., "city", "peak", "river")
state: State/province name (may be None)
country_code: ISO country code (e.g., "us", "ca")
Returns:
dict with wiki_summary, wiki_url, wikivoyage_url, wiki_population
or None if no match found.
"""
db = _get_db()
if db is None:
return None
# Normalize inputs
place_name = (place_name or '').strip()
osm_key = (osm_key or '').strip().lower()
osm_value = (osm_value or '').strip().lower()
state = (state or '').strip()
country_code = (country_code or '').strip().lower()
if not place_name or not osm_key or not osm_value or not country_code:
return None
try:
# Direct match query
row = db.execute("""
SELECT
summary,
wikipedia_title,
wikivoyage_title,
wikipedia_exists,
wikivoyage_exists,
wiki_population
FROM wiki_places
WHERE place_name = ?
AND osm_key = ?
AND osm_value = ?
AND COALESCE(state, '') = ?
AND country_code = ?
AND wikipedia_exists = 1
LIMIT 1
""", (place_name, osm_key, osm_value, state, country_code)).fetchone()
if not row:
return None
result = {}
# Summary
if row['summary']:
result['wiki_summary'] = row['summary']
# Wikipedia URL
if row['wikipedia_exists'] and row['wikipedia_title'] and 'wikipedia' in _zim_books:
base_url = _zim_books['wikipedia']
title = row['wikipedia_title'].replace(' ', '_')
result['wiki_url'] = f"{base_url}/A/{title}"
# Wikivoyage URL
if row['wikivoyage_exists'] and row['wikivoyage_title'] and 'wikivoyage' in _zim_books:
base_url = _zim_books['wikivoyage']
title = row['wikivoyage_title'].replace(' ', '_')
result['wikivoyage_url'] = f"{base_url}/A/{title}"
# Population
if row['wiki_population']:
result['wiki_population'] = row['wiki_population']
return result if result else None
except Exception as e:
logger.warning(f"Wiki lookup error for {place_name}: {e}")
return None
def is_available():
"""Check if the wiki index DB is available."""
return _get_db() is not None