mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Fix wiki lookup to match on name+state+country instead of osm_key/osm_value
- Remove osm_key/osm_value from wiki_places lookup query - Add fallback matching: try state first, then country only - Parse state/country from wikipedia extratag when address is empty - Add US states and Canadian provinces parsing for wikipedia titles - Apply wiki enrichment to cached results (was missing) Fixes wiki_summary and wiki_url not appearing for boundary/administrative places like Joliet, IL where OSM returns boundary/administrative but wiki_places has place/city. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
1b3ad1bf9e
commit
248f4bded4
2 changed files with 89 additions and 27 deletions
|
|
@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
|||
OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)"
|
||||
VALID_OSM_TYPES = {"N", "W", "R"}
|
||||
|
||||
# US states and Canadian provinces for wikipedia title parsing
|
||||
US_STATES = {
|
||||
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
|
||||
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
|
||||
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
|
||||
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
|
||||
'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
|
||||
'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
|
||||
'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
|
||||
'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
|
||||
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
|
||||
'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
|
||||
}
|
||||
|
||||
CANADIAN_PROVINCES = {
|
||||
'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
|
||||
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
|
||||
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon'
|
||||
}
|
||||
|
||||
|
||||
def _parse_state_from_wikipedia(wikipedia_tag):
|
||||
"""Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'"""
|
||||
if not wikipedia_tag or not wikipedia_tag.startswith('en:'):
|
||||
return None, None
|
||||
title = wikipedia_tag[3:]
|
||||
for state in US_STATES:
|
||||
if state in title:
|
||||
return state, 'us'
|
||||
for prov in CANADIAN_PROVINCES:
|
||||
if prov in title:
|
||||
return prov, 'ca'
|
||||
return None, None
|
||||
|
||||
|
||||
_db_conn = None
|
||||
|
||||
|
||||
|
|
@ -374,9 +409,17 @@ def _enrich_with_wiki_index(result):
|
|||
state = address.get('state', '')
|
||||
country_code = address.get('country_code', '')
|
||||
|
||||
# If state/country missing, try to derive from wikipedia extratag
|
||||
extratags = result.get('extratags', {})
|
||||
if (not state or not country_code) and extratags.get('wikipedia'):
|
||||
derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia'])
|
||||
if not state and derived_state:
|
||||
state = derived_state
|
||||
if not country_code and derived_country:
|
||||
country_code = derived_country
|
||||
|
||||
# Handle boundary/administrative - get actual place type from extratags
|
||||
# (e.g. boundary:administrative with extratags.place='city' -> place:city)
|
||||
extratags = result.get('extratags', {})
|
||||
if osm_class == 'boundary' and osm_type_tag == 'administrative':
|
||||
place_tag = extratags.get('place') or extratags.get('linked_place')
|
||||
if place_tag:
|
||||
|
|
@ -673,6 +716,7 @@ def get_place_detail(osm_type, osm_id):
|
|||
# 1. Check cache
|
||||
cached = cache_get(osm_type, osm_id)
|
||||
if cached:
|
||||
cached = _enrich_with_wiki_index(cached)
|
||||
logger.debug(f"Cache hit: {osm_type}/{osm_id}")
|
||||
return cached, 200
|
||||
|
||||
|
|
|
|||
|
|
@ -52,13 +52,13 @@ def _get_db():
|
|||
|
||||
def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
|
||||
"""
|
||||
Look up wiki data for a place by exact match.
|
||||
Look up wiki data for a place by name and country, with optional state.
|
||||
|
||||
Args:
|
||||
place_name: Name of the place (e.g., "Twin Falls")
|
||||
osm_key: OSM key (e.g., "place", "natural", "waterway")
|
||||
osm_value: OSM value (e.g., "city", "peak", "river")
|
||||
state: State/province name (may be None)
|
||||
osm_key: OSM key (unused, kept for API compatibility)
|
||||
osm_value: OSM value (unused, kept for API compatibility)
|
||||
state: State/province name (may be None or empty)
|
||||
country_code: ISO country code (e.g., "us", "ca")
|
||||
|
||||
Returns:
|
||||
|
|
@ -71,16 +71,17 @@ def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
|
|||
|
||||
# Normalize inputs
|
||||
place_name = (place_name or '').strip()
|
||||
osm_key = (osm_key or '').strip().lower()
|
||||
osm_value = (osm_value or '').strip().lower()
|
||||
state = (state or '').strip()
|
||||
state = (state or '').strip() if state else ''
|
||||
country_code = (country_code or '').strip().lower()
|
||||
|
||||
if not place_name or not osm_key or not osm_value or not country_code:
|
||||
if not place_name or not country_code:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Direct match query
|
||||
row = None
|
||||
|
||||
# Try exact match with state first (if state provided)
|
||||
if state:
|
||||
row = db.execute("""
|
||||
SELECT
|
||||
summary,
|
||||
|
|
@ -91,13 +92,30 @@ def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
|
|||
wiki_population
|
||||
FROM wiki_places
|
||||
WHERE place_name = ?
|
||||
AND osm_key = ?
|
||||
AND osm_value = ?
|
||||
AND COALESCE(state, '') = ?
|
||||
AND state = ?
|
||||
AND country_code = ?
|
||||
AND wikipedia_exists = 1
|
||||
AND summary IS NOT NULL
|
||||
ORDER BY importance DESC
|
||||
LIMIT 1
|
||||
""", (place_name, osm_key, osm_value, state, country_code)).fetchone()
|
||||
""", (place_name, state, country_code)).fetchone()
|
||||
|
||||
# Fall back to name + country only (for places without state in query)
|
||||
if not row:
|
||||
row = db.execute("""
|
||||
SELECT
|
||||
summary,
|
||||
wikipedia_title,
|
||||
wikivoyage_title,
|
||||
wikipedia_exists,
|
||||
wikivoyage_exists,
|
||||
wiki_population
|
||||
FROM wiki_places
|
||||
WHERE place_name = ?
|
||||
AND country_code = ?
|
||||
AND summary IS NOT NULL
|
||||
ORDER BY importance DESC
|
||||
LIMIT 1
|
||||
""", (place_name, country_code)).fetchone()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue