From 248f4bded4b3b00a793d7d5b3e647258e785988f Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 3 May 2026 00:17:49 +0000 Subject: [PATCH] Fix wiki lookup to match on name+state+country instead of osm_key/osm_value - Remove osm_key/osm_value from wiki_places lookup query - Add fallback matching: try state first, then country only - Parse state/country from wikipedia extratag when address is empty - Add US states and Canadian provinces parsing for wikipedia titles - Apply wiki enrichment to cached results (was missing) Fixes wiki_summary and wiki_url not appearing for boundary/administrative places like Joliet, IL where OSM returns boundary/administrative but wiki_places has place/city. Co-Authored-By: Claude Opus 4.5 --- lib/place_detail.py | 46 ++++++++++++++++++++++++++++- lib/wiki_index.py | 70 ++++++++++++++++++++++++++++----------------- 2 files changed, 89 insertions(+), 27 deletions(-) diff --git a/lib/place_detail.py b/lib/place_detail.py index c2c6845..35ffe28 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter" OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" VALID_OSM_TYPES = {"N", "W", "R"} +# US states and Canadian provinces for wikipedia title parsing +US_STATES = { + 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', + 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', + 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', + 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', + 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', + 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', + 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', + 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', + 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', + 'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia' +} + +CANADIAN_PROVINCES = { + 'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick', + 'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia', + 'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon' +} + + +def _parse_state_from_wikipedia(wikipedia_tag): + """Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'""" + if not wikipedia_tag or not wikipedia_tag.startswith('en:'): + return None, None + title = wikipedia_tag[3:] + for state in US_STATES: + if state in title: + return state, 'us' + for prov in CANADIAN_PROVINCES: + if prov in title: + return prov, 'ca' + return None, None + + _db_conn = None @@ -373,10 +408,18 @@ def _enrich_with_wiki_index(result): address = result.get('address', {}) state = address.get('state', '') country_code = address.get('country_code', '') + + # If state/country missing, try to derive from wikipedia extratag + extratags = result.get('extratags', {}) + if (not state or not country_code) and extratags.get('wikipedia'): + derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia']) + if not state and derived_state: + state = derived_state + if not country_code and derived_country: + country_code = derived_country # Handle boundary/administrative - get actual place type from extratags # (e.g. boundary:administrative with extratags.place='city' -> place:city) - extratags = result.get('extratags', {}) if osm_class == 'boundary' and osm_type_tag == 'administrative': place_tag = extratags.get('place') or extratags.get('linked_place') if place_tag: @@ -673,6 +716,7 @@ def get_place_detail(osm_type, osm_id): # 1. Check cache cached = cache_get(osm_type, osm_id) if cached: + cached = _enrich_with_wiki_index(cached) logger.debug(f"Cache hit: {osm_type}/{osm_id}") return cached, 200 diff --git a/lib/wiki_index.py b/lib/wiki_index.py index 0b38d56..4d4ced3 100644 --- a/lib/wiki_index.py +++ b/lib/wiki_index.py @@ -52,13 +52,13 @@ def _get_db(): def lookup_wiki(place_name, osm_key, osm_value, state, country_code): """ - Look up wiki data for a place by exact match. + Look up wiki data for a place by name and country, with optional state. Args: place_name: Name of the place (e.g., "Twin Falls") - osm_key: OSM key (e.g., "place", "natural", "waterway") - osm_value: OSM value (e.g., "city", "peak", "river") - state: State/province name (may be None) + osm_key: OSM key (unused, kept for API compatibility) + osm_value: OSM value (unused, kept for API compatibility) + state: State/province name (may be None or empty) country_code: ISO country code (e.g., "us", "ca") Returns: @@ -71,33 +71,51 @@ def lookup_wiki(place_name, osm_key, osm_value, state, country_code): # Normalize inputs place_name = (place_name or '').strip() - osm_key = (osm_key or '').strip().lower() - osm_value = (osm_value or '').strip().lower() - state = (state or '').strip() + state = (state or '').strip() if state else '' country_code = (country_code or '').strip().lower() - if not place_name or not osm_key or not osm_value or not country_code: + if not place_name or not country_code: return None try: - # Direct match query - row = db.execute(""" - SELECT - summary, - wikipedia_title, - wikivoyage_title, - wikipedia_exists, - wikivoyage_exists, - wiki_population - FROM wiki_places - WHERE place_name = ? - AND osm_key = ? - AND osm_value = ? - AND COALESCE(state, '') = ? - AND country_code = ? - AND wikipedia_exists = 1 - LIMIT 1 - """, (place_name, osm_key, osm_value, state, country_code)).fetchone() + row = None + + # Try exact match with state first (if state provided) + if state: + row = db.execute(""" + SELECT + summary, + wikipedia_title, + wikivoyage_title, + wikipedia_exists, + wikivoyage_exists, + wiki_population + FROM wiki_places + WHERE place_name = ? + AND state = ? + AND country_code = ? + AND summary IS NOT NULL + ORDER BY importance DESC + LIMIT 1 + """, (place_name, state, country_code)).fetchone() + + # Fall back to name + country only (for places without state in query) + if not row: + row = db.execute(""" + SELECT + summary, + wikipedia_title, + wikivoyage_title, + wikipedia_exists, + wikivoyage_exists, + wiki_population + FROM wiki_places + WHERE place_name = ? + AND country_code = ? + AND summary IS NOT NULL + ORDER BY importance DESC + LIMIT 1 + """, (place_name, country_code)).fetchone() if not row: return None