Fix wiki lookup to match on name+state+country instead of osm_key/osm_value

- Remove osm_key/osm_value from wiki_places lookup query
- Add fallback matching: try state first, then country only
- Parse state/country from wikipedia extratag when address is empty
- Add US states and Canadian provinces parsing for wikipedia titles
- Apply wiki enrichment to cached results (was missing)

Fixes wiki_summary and wiki_url not appearing for boundary/administrative
places like Joliet, IL where OSM returns boundary/administrative but
wiki_places has place/city.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Matt 2026-05-03 00:17:49 +00:00
commit 248f4bded4
2 changed files with 89 additions and 27 deletions

View file

@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)" OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)"
VALID_OSM_TYPES = {"N", "W", "R"} VALID_OSM_TYPES = {"N", "W", "R"}
# US states and Canadian provinces for wikipedia title parsing
US_STATES = {
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
}
CANADIAN_PROVINCES = {
'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon'
}
def _parse_state_from_wikipedia(wikipedia_tag):
"""Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'"""
if not wikipedia_tag or not wikipedia_tag.startswith('en:'):
return None, None
title = wikipedia_tag[3:]
for state in US_STATES:
if state in title:
return state, 'us'
for prov in CANADIAN_PROVINCES:
if prov in title:
return prov, 'ca'
return None, None
_db_conn = None _db_conn = None
@ -374,9 +409,17 @@ def _enrich_with_wiki_index(result):
state = address.get('state', '') state = address.get('state', '')
country_code = address.get('country_code', '') country_code = address.get('country_code', '')
# If state/country missing, try to derive from wikipedia extratag
extratags = result.get('extratags', {})
if (not state or not country_code) and extratags.get('wikipedia'):
derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia'])
if not state and derived_state:
state = derived_state
if not country_code and derived_country:
country_code = derived_country
# Handle boundary/administrative - get actual place type from extratags # Handle boundary/administrative - get actual place type from extratags
# (e.g. boundary:administrative with extratags.place='city' -> place:city) # (e.g. boundary:administrative with extratags.place='city' -> place:city)
extratags = result.get('extratags', {})
if osm_class == 'boundary' and osm_type_tag == 'administrative': if osm_class == 'boundary' and osm_type_tag == 'administrative':
place_tag = extratags.get('place') or extratags.get('linked_place') place_tag = extratags.get('place') or extratags.get('linked_place')
if place_tag: if place_tag:
@ -673,6 +716,7 @@ def get_place_detail(osm_type, osm_id):
# 1. Check cache # 1. Check cache
cached = cache_get(osm_type, osm_id) cached = cache_get(osm_type, osm_id)
if cached: if cached:
cached = _enrich_with_wiki_index(cached)
logger.debug(f"Cache hit: {osm_type}/{osm_id}") logger.debug(f"Cache hit: {osm_type}/{osm_id}")
return cached, 200 return cached, 200

View file

@ -52,13 +52,13 @@ def _get_db():
def lookup_wiki(place_name, osm_key, osm_value, state, country_code): def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
""" """
Look up wiki data for a place by exact match. Look up wiki data for a place by name and country, with optional state.
Args: Args:
place_name: Name of the place (e.g., "Twin Falls") place_name: Name of the place (e.g., "Twin Falls")
osm_key: OSM key (e.g., "place", "natural", "waterway") osm_key: OSM key (unused, kept for API compatibility)
osm_value: OSM value (e.g., "city", "peak", "river") osm_value: OSM value (unused, kept for API compatibility)
state: State/province name (may be None) state: State/province name (may be None or empty)
country_code: ISO country code (e.g., "us", "ca") country_code: ISO country code (e.g., "us", "ca")
Returns: Returns:
@ -71,33 +71,51 @@ def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
# Normalize inputs # Normalize inputs
place_name = (place_name or '').strip() place_name = (place_name or '').strip()
osm_key = (osm_key or '').strip().lower() state = (state or '').strip() if state else ''
osm_value = (osm_value or '').strip().lower()
state = (state or '').strip()
country_code = (country_code or '').strip().lower() country_code = (country_code or '').strip().lower()
if not place_name or not osm_key or not osm_value or not country_code: if not place_name or not country_code:
return None return None
try: try:
# Direct match query row = None
row = db.execute("""
SELECT # Try exact match with state first (if state provided)
summary, if state:
wikipedia_title, row = db.execute("""
wikivoyage_title, SELECT
wikipedia_exists, summary,
wikivoyage_exists, wikipedia_title,
wiki_population wikivoyage_title,
FROM wiki_places wikipedia_exists,
WHERE place_name = ? wikivoyage_exists,
AND osm_key = ? wiki_population
AND osm_value = ? FROM wiki_places
AND COALESCE(state, '') = ? WHERE place_name = ?
AND country_code = ? AND state = ?
AND wikipedia_exists = 1 AND country_code = ?
LIMIT 1 AND summary IS NOT NULL
""", (place_name, osm_key, osm_value, state, country_code)).fetchone() ORDER BY importance DESC
LIMIT 1
""", (place_name, state, country_code)).fetchone()
# Fall back to name + country only (for places without state in query)
if not row:
row = db.execute("""
SELECT
summary,
wikipedia_title,
wikivoyage_title,
wikipedia_exists,
wikivoyage_exists,
wiki_population
FROM wiki_places
WHERE place_name = ?
AND country_code = ?
AND summary IS NOT NULL
ORDER BY importance DESC
LIMIT 1
""", (place_name, country_code)).fetchone()
if not row: if not row:
return None return None