From 2d1dcbf70cb17d44e2527f57d686c1d15229a7bb Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 29 Apr 2026 17:07:31 +0000 Subject: [PATCH 1/5] feat(place): add wiki location index lookup Add wiki summary, URLs, and population data from wiki_index.db to place detail and reverse geocode responses. - Add lib/wiki_index.py: SQLite read-only lookup module - Enrich /api/place responses via _enrich_with_wiki_index() - Enrich /api/reverse responses via _enrich_reverse_result_with_wiki() - Gate on has_kiwix_wiki feature flag (default false in home.yaml) - Direct match only on place_name + osm_key + osm_value + state + country_code - Additive fields: wiki_summary, wiki_url, wikivoyage_url, wiki_population DB path: /opt/recon/data/wiki_index.db Co-Authored-By: Claude Opus 4.5 --- lib/netsyms_api.py | 63 ++++++++++++++++++++ lib/place_detail.py | 61 ++++++++++++++++++++ lib/wiki_index.py | 136 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 lib/wiki_index.py diff --git a/lib/netsyms_api.py b/lib/netsyms_api.py index 4a0847f..ce76e1f 100644 --- a/lib/netsyms_api.py +++ b/lib/netsyms_api.py @@ -15,6 +15,66 @@ from .utils import setup_logging logger = setup_logging('recon.netsyms_api') + +def _enrich_reverse_result_with_wiki(result): + """ + Add wiki data to a reverse geocode result if available. + Only runs when has_kiwix_wiki is enabled. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_kiwix_wiki', False): + return result + except Exception: + return result + + try: + from . import wiki_index + except ImportError: + return result + + if not wiki_index.is_available(): + return result + + # Extract match criteria from Photon raw props + raw = result.get('raw', {}) + place_name = raw.get('name', '') + osm_key = raw.get('osm_key', '') + osm_value = raw.get('osm_value', '') + state = raw.get('state', '') + country = raw.get('country', '') + + # Extract country code (Photon uses full country name, we need code) + country_code = raw.get('countrycode', '').lower() + if not country_code: + country_lower = country.lower() if country else '' + if 'united states' in country_lower or country_lower == 'usa': + country_code = 'us' + elif 'canada' in country_lower: + country_code = 'ca' + + if not place_name or not osm_key or not osm_value or not country_code: + return result + + # Look up wiki data + wiki_data = wiki_index.lookup_wiki(place_name, osm_key, osm_value, state, country_code) + if wiki_data: + # Add wiki fields to result (additive only) + if 'wiki_summary' in wiki_data: + result['wiki_summary'] = wiki_data['wiki_summary'] + if 'wiki_url' in wiki_data: + result['wiki_url'] = wiki_data['wiki_url'] + if 'wikivoyage_url' in wiki_data: + result['wikivoyage_url'] = wiki_data['wikivoyage_url'] + if 'wiki_population' in wiki_data: + result['wiki_population'] = wiki_data['wiki_population'] + + return result + + + netsyms_bp = Blueprint('netsyms', __name__) geocode_bp = Blueprint('geocode', __name__) @@ -123,4 +183,7 @@ def api_reverse(): from .geocode import _parse_photon_features results = _parse_photon_features(features, source='photon_reverse') + # Enrich results with wiki data + results = [_enrich_reverse_result_with_wiki(r) for r in results] + return jsonify({'query': query_str, 'results': results, 'count': len(results)}) diff --git a/lib/place_detail.py b/lib/place_detail.py index e85ee54..6db7a4c 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -328,6 +328,65 @@ def _enrich_wiki_links(result): return result + + +# ── Wiki Index enrichment ─────────────────────────────────────────────── + +def _enrich_with_wiki_index(result): + """ + Add wiki summary, URLs, and population from wiki_index.db. + Only runs when has_kiwix_wiki is enabled. Direct match only. + Returns the (possibly enriched) result dict. + """ + try: + from .deployment_config import get_deployment_config + deploy_config = get_deployment_config() + features = deploy_config.get('features', {}) + if not features.get('has_kiwix_wiki', False): + return result + except Exception: + return result + + try: + from . import wiki_index + except ImportError: + logger.debug("wiki_index module not available") + return result + + if not wiki_index.is_available(): + return result + + # Extract match criteria from result + name = result.get('name', '') + osm_class = result.get('class', '') + osm_type_tag = result.get('type', '') + address = result.get('address', {}) + state = address.get('state', '') + country_code = address.get('country_code', '') + + if not name or not osm_class or not osm_type_tag: + return result + + # Look up wiki data + wiki_data = wiki_index.lookup_wiki(name, osm_class, osm_type_tag, state, country_code) + if not wiki_data: + return result + + # Add wiki fields to result (additive only) + if 'wiki_summary' in wiki_data: + result['wiki_summary'] = wiki_data['wiki_summary'] + if 'wiki_url' in wiki_data: + result['wiki_url'] = wiki_data['wiki_url'] + if 'wikivoyage_url' in wiki_data: + result['wikivoyage_url'] = wiki_data['wikivoyage_url'] + if 'wiki_population' in wiki_data: + result['wiki_population'] = wiki_data['wiki_population'] + + result.setdefault('sources', {})['wiki_index'] = True + logger.debug(f"Wiki index enrichment for {name}") + + return result + # ── Nominatim parsing ─────────────────────────────────────────────────── # Nominatim address array uses rank_address to indicate what each entry is. @@ -625,6 +684,7 @@ def get_place_detail(osm_type, osm_id): nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id) nominatim_result = _enrich_wiki_links(nominatim_result) + nominatim_result = _enrich_with_wiki_index(nominatim_result) cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local') return nominatim_result, 200 @@ -658,6 +718,7 @@ def get_place_detail(osm_type, osm_id): overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id) overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id) overpass_result = _enrich_wiki_links(overpass_result) + overpass_result = _enrich_with_wiki_index(overpass_result) cache_put(osm_type, osm_id, overpass_result, 'overpass') return overpass_result, 200 diff --git a/lib/wiki_index.py b/lib/wiki_index.py new file mode 100644 index 0000000..0b38d56 --- /dev/null +++ b/lib/wiki_index.py @@ -0,0 +1,136 @@ +""" +Wiki location index lookup. + +Provides wiki summaries, URLs, and population data from the wiki_index.db +for place detail enrichment. Read-only, opened once at startup. + +DB path: /opt/recon/data/wiki_index.db +""" +import os +import sqlite3 + +from .utils import setup_logging + +logger = setup_logging('recon.wiki_index') + +_db_conn = None +_zim_books = {} + + +def _get_db(): + """Return a module-level SQLite connection (lazy init, read-only).""" + global _db_conn, _zim_books + + if _db_conn is not None: + return _db_conn + + db_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'data', 'wiki_index.db' + ) + + if not os.path.exists(db_path): + logger.warning(f"Wiki index DB not found at {db_path}") + return None + + try: + # Open read-only with URI + _db_conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False) + _db_conn.row_factory = sqlite3.Row + + # Load zim_books for URL construction + rows = _db_conn.execute("SELECT book_type, public_url FROM zim_books").fetchall() + for row in rows: + _zim_books[row['book_type']] = row['public_url'] + + logger.info(f"Wiki index DB ready at {db_path} ({len(_zim_books)} ZIM books)") + return _db_conn + except Exception as e: + logger.error(f"Failed to open wiki index DB: {e}") + return None + + +def lookup_wiki(place_name, osm_key, osm_value, state, country_code): + """ + Look up wiki data for a place by exact match. + + Args: + place_name: Name of the place (e.g., "Twin Falls") + osm_key: OSM key (e.g., "place", "natural", "waterway") + osm_value: OSM value (e.g., "city", "peak", "river") + state: State/province name (may be None) + country_code: ISO country code (e.g., "us", "ca") + + Returns: + dict with wiki_summary, wiki_url, wikivoyage_url, wiki_population + or None if no match found. + """ + db = _get_db() + if db is None: + return None + + # Normalize inputs + place_name = (place_name or '').strip() + osm_key = (osm_key or '').strip().lower() + osm_value = (osm_value or '').strip().lower() + state = (state or '').strip() + country_code = (country_code or '').strip().lower() + + if not place_name or not osm_key or not osm_value or not country_code: + return None + + try: + # Direct match query + row = db.execute(""" + SELECT + summary, + wikipedia_title, + wikivoyage_title, + wikipedia_exists, + wikivoyage_exists, + wiki_population + FROM wiki_places + WHERE place_name = ? + AND osm_key = ? + AND osm_value = ? + AND COALESCE(state, '') = ? + AND country_code = ? + AND wikipedia_exists = 1 + LIMIT 1 + """, (place_name, osm_key, osm_value, state, country_code)).fetchone() + + if not row: + return None + + result = {} + + # Summary + if row['summary']: + result['wiki_summary'] = row['summary'] + + # Wikipedia URL + if row['wikipedia_exists'] and row['wikipedia_title'] and 'wikipedia' in _zim_books: + base_url = _zim_books['wikipedia'] + title = row['wikipedia_title'].replace(' ', '_') + result['wiki_url'] = f"{base_url}/A/{title}" + + # Wikivoyage URL + if row['wikivoyage_exists'] and row['wikivoyage_title'] and 'wikivoyage' in _zim_books: + base_url = _zim_books['wikivoyage'] + title = row['wikivoyage_title'].replace(' ', '_') + result['wikivoyage_url'] = f"{base_url}/A/{title}" + + # Population + if row['wiki_population']: + result['wiki_population'] = row['wiki_population'] + + return result if result else None + + except Exception as e: + logger.warning(f"Wiki lookup error for {place_name}: {e}") + return None + + +def is_available(): + """Check if the wiki index DB is available.""" + return _get_db() is not None From c92178d90f96bce874eff3a6014136cdc27b3434 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 29 Apr 2026 17:21:38 +0000 Subject: [PATCH 2/5] fix(place): handle boundary/administrative for wiki lookup Add place/linked_place to extratags so wiki enrichment can detect actual place type (e.g. boundary:administrative with place=city should match wiki_places osm_key=place, osm_value=city). Co-Authored-By: Claude Opus 4.5 --- lib/place_detail.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/place_detail.py b/lib/place_detail.py index 6db7a4c..8bb2330 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -364,6 +364,15 @@ def _enrich_with_wiki_index(result): state = address.get('state', '') country_code = address.get('country_code', '') + # Handle boundary/administrative - get actual place type from extratags + # (e.g. boundary:administrative with extratags.place='city' -> place:city) + extratags = result.get('extratags', {}) + if osm_class == 'boundary' and osm_type_tag == 'administrative': + place_tag = extratags.get('place') or extratags.get('linked_place') + if place_tag: + osm_class = 'place' + osm_type_tag = place_tag + if not name or not osm_class or not osm_type_tag: return result @@ -503,6 +512,8 @@ def _parse_nominatim(data): 'wheelchair': raw_extra.get('wheelchair'), 'fee': raw_extra.get('fee'), 'takeaway': raw_extra.get('takeaway'), + 'place': raw_extra.get('place'), + 'linked_place': raw_extra.get('linked_place'), } # Category: use extratags.place for boundaries (e.g. "city"), else class/type From bb7d0169e04144a126aa2f7ae9b21bbc8e926cd0 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 29 Apr 2026 18:53:03 +0000 Subject: [PATCH 3/5] fix(place): preserve extratags.wikipedia format when has_kiwix_wiki enabled When has_kiwix_wiki is enabled, skip rewriting extratags.wikipedia to a Kiwix content URL. The wiki_url field already provides the correct local Kiwix viewer link for the WikiSummarySection. This preserves the original OSM format (e.g. "en:Title") in extratags.wikipedia so the frontend LINKS section can properly build wikipedia.org URLs. Co-Authored-By: Claude --- lib/place_detail.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/place_detail.py b/lib/place_detail.py index 8bb2330..c2c6845 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -290,6 +290,11 @@ def _enrich_wiki_links(result): """ Rewrite wiki-related extratags to local Kiwix URLs where available. Falls back to public URLs. Only runs when has_wiki_rewriting is enabled. + + Note: When has_kiwix_wiki is enabled, we skip rewriting 'wikipedia' since + the wiki_index enrichment provides a proper wiki_url field. This keeps + extratags.wikipedia in the original OSM format for frontend link builders. + Returns the (possibly enriched) result dict. """ try: @@ -298,6 +303,8 @@ def _enrich_wiki_links(result): features = deploy_config.get('features', {}) if not features.get('has_wiki_rewriting', False): return result + # When has_kiwix_wiki is enabled, skip wikipedia rewriting (wiki_url handles it) + has_kiwix_wiki = features.get('has_kiwix_wiki', False) except Exception: return result @@ -313,6 +320,9 @@ def _enrich_wiki_links(result): rewrites = {} for tag in _WIKI_TAGS: + # Skip wikipedia when has_kiwix_wiki is enabled (wiki_url provides the local link) + if tag == 'wikipedia' and has_kiwix_wiki: + continue value = extratags.get(tag) if not value: continue From 1b3ad1bf9ea224554c160947b465de776b14732f Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 30 Apr 2026 03:22:24 +0000 Subject: [PATCH 4/5] fix(google_places): add schema migration for Google columns The place_cache table was missing google_place_id, google_data, and google_fetched_at columns needed by the Google Places enrichment. Adds ALTER TABLE migration in _get_db() to add columns if missing, wrapped in try/except to handle existing columns gracefully. Fixes HTTP 500 on /api/place/W/* endpoints. --- lib/google_places.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/google_places.py b/lib/google_places.py index 8272b81..55cf051 100644 --- a/lib/google_places.py +++ b/lib/google_places.py @@ -47,6 +47,18 @@ def _get_db(): ) """) _db_conn.commit() + # Schema migration: add Google columns to place_cache if missing + for col, coldef in [ + ('google_place_id', 'TEXT'), + ('google_data', 'TEXT'), + ('google_fetched_at', 'INTEGER'), + ]: + try: + _db_conn.execute(f'ALTER TABLE place_cache ADD COLUMN {col} {coldef}') + logger.info(f'Added column {col} to place_cache') + except sqlite3.OperationalError: + pass # Column already exists + _db_conn.commit() return _db_conn From 248f4bded4b3b00a793d7d5b3e647258e785988f Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 3 May 2026 00:17:49 +0000 Subject: [PATCH 5/5] Fix wiki lookup to match on name+state+country instead of osm_key/osm_value - Remove osm_key/osm_value from wiki_places lookup query - Add fallback matching: try state first, then country only - Parse state/country from wikipedia extratag when address is empty - Add US states and Canadian provinces parsing for wikipedia titles - Apply wiki enrichment to cached results (was missing) Fixes wiki_summary and wiki_url not appearing for boundary/administrative places like Joliet, IL where OSM returns boundary/administrative but wiki_places has place/city. Co-Authored-By: Claude Opus 4.5 --- lib/place_detail.py | 46 ++++++++++++++++++++++++++++- lib/wiki_index.py | 70 ++++++++++++++++++++++++++++----------------- 2 files changed, 89 insertions(+), 27 deletions(-) diff --git a/lib/place_detail.py b/lib/place_detail.py index c2c6845..35ffe28 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter" OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" VALID_OSM_TYPES = {"N", "W", "R"} +# US states and Canadian provinces for wikipedia title parsing +US_STATES = { + 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', + 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', + 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', + 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', + 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', + 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', + 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', + 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', + 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', + 'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia' +} + +CANADIAN_PROVINCES = { + 'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick', + 'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia', + 'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon' +} + + +def _parse_state_from_wikipedia(wikipedia_tag): + """Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'""" + if not wikipedia_tag or not wikipedia_tag.startswith('en:'): + return None, None + title = wikipedia_tag[3:] + for state in US_STATES: + if state in title: + return state, 'us' + for prov in CANADIAN_PROVINCES: + if prov in title: + return prov, 'ca' + return None, None + + _db_conn = None @@ -373,10 +408,18 @@ def _enrich_with_wiki_index(result): address = result.get('address', {}) state = address.get('state', '') country_code = address.get('country_code', '') + + # If state/country missing, try to derive from wikipedia extratag + extratags = result.get('extratags', {}) + if (not state or not country_code) and extratags.get('wikipedia'): + derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia']) + if not state and derived_state: + state = derived_state + if not country_code and derived_country: + country_code = derived_country # Handle boundary/administrative - get actual place type from extratags # (e.g. boundary:administrative with extratags.place='city' -> place:city) - extratags = result.get('extratags', {}) if osm_class == 'boundary' and osm_type_tag == 'administrative': place_tag = extratags.get('place') or extratags.get('linked_place') if place_tag: @@ -673,6 +716,7 @@ def get_place_detail(osm_type, osm_id): # 1. Check cache cached = cache_get(osm_type, osm_id) if cached: + cached = _enrich_with_wiki_index(cached) logger.debug(f"Cache hit: {osm_type}/{osm_id}") return cached, 200 diff --git a/lib/wiki_index.py b/lib/wiki_index.py index 0b38d56..4d4ced3 100644 --- a/lib/wiki_index.py +++ b/lib/wiki_index.py @@ -52,13 +52,13 @@ def _get_db(): def lookup_wiki(place_name, osm_key, osm_value, state, country_code): """ - Look up wiki data for a place by exact match. + Look up wiki data for a place by name and country, with optional state. Args: place_name: Name of the place (e.g., "Twin Falls") - osm_key: OSM key (e.g., "place", "natural", "waterway") - osm_value: OSM value (e.g., "city", "peak", "river") - state: State/province name (may be None) + osm_key: OSM key (unused, kept for API compatibility) + osm_value: OSM value (unused, kept for API compatibility) + state: State/province name (may be None or empty) country_code: ISO country code (e.g., "us", "ca") Returns: @@ -71,33 +71,51 @@ def lookup_wiki(place_name, osm_key, osm_value, state, country_code): # Normalize inputs place_name = (place_name or '').strip() - osm_key = (osm_key or '').strip().lower() - osm_value = (osm_value or '').strip().lower() - state = (state or '').strip() + state = (state or '').strip() if state else '' country_code = (country_code or '').strip().lower() - if not place_name or not osm_key or not osm_value or not country_code: + if not place_name or not country_code: return None try: - # Direct match query - row = db.execute(""" - SELECT - summary, - wikipedia_title, - wikivoyage_title, - wikipedia_exists, - wikivoyage_exists, - wiki_population - FROM wiki_places - WHERE place_name = ? - AND osm_key = ? - AND osm_value = ? - AND COALESCE(state, '') = ? - AND country_code = ? - AND wikipedia_exists = 1 - LIMIT 1 - """, (place_name, osm_key, osm_value, state, country_code)).fetchone() + row = None + + # Try exact match with state first (if state provided) + if state: + row = db.execute(""" + SELECT + summary, + wikipedia_title, + wikivoyage_title, + wikipedia_exists, + wikivoyage_exists, + wiki_population + FROM wiki_places + WHERE place_name = ? + AND state = ? + AND country_code = ? + AND summary IS NOT NULL + ORDER BY importance DESC + LIMIT 1 + """, (place_name, state, country_code)).fetchone() + + # Fall back to name + country only (for places without state in query) + if not row: + row = db.execute(""" + SELECT + summary, + wikipedia_title, + wikivoyage_title, + wikipedia_exists, + wikivoyage_exists, + wiki_population + FROM wiki_places + WHERE place_name = ? + AND country_code = ? + AND summary IS NOT NULL + ORDER BY importance DESC + LIMIT 1 + """, (place_name, country_code)).fetchone() if not row: return None