mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
- Remove osm_key/osm_value from wiki_places lookup query - Add fallback matching: try state first, then country only - Parse state/country from wikipedia extratag when address is empty - Add US states and Canadian provinces parsing for wikipedia titles - Apply wiki enrichment to cached results (was missing) Fixes wiki_summary and wiki_url not appearing for boundary/administrative places like Joliet, IL where OSM returns boundary/administrative but wiki_places has place/city. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
154 lines
4.7 KiB
Python
154 lines
4.7 KiB
Python
"""
|
|
Wiki location index lookup.
|
|
|
|
Provides wiki summaries, URLs, and population data from the wiki_index.db
|
|
for place detail enrichment. Read-only, opened once at startup.
|
|
|
|
DB path: /opt/recon/data/wiki_index.db
|
|
"""
|
|
import os
|
|
import sqlite3
|
|
|
|
from .utils import setup_logging
|
|
|
|
logger = setup_logging('recon.wiki_index')
|
|
|
|
_db_conn = None
|
|
_zim_books = {}
|
|
|
|
|
|
def _get_db():
|
|
"""Return a module-level SQLite connection (lazy init, read-only)."""
|
|
global _db_conn, _zim_books
|
|
|
|
if _db_conn is not None:
|
|
return _db_conn
|
|
|
|
db_path = os.path.join(
|
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
|
'data', 'wiki_index.db'
|
|
)
|
|
|
|
if not os.path.exists(db_path):
|
|
logger.warning(f"Wiki index DB not found at {db_path}")
|
|
return None
|
|
|
|
try:
|
|
# Open read-only with URI
|
|
_db_conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False)
|
|
_db_conn.row_factory = sqlite3.Row
|
|
|
|
# Load zim_books for URL construction
|
|
rows = _db_conn.execute("SELECT book_type, public_url FROM zim_books").fetchall()
|
|
for row in rows:
|
|
_zim_books[row['book_type']] = row['public_url']
|
|
|
|
logger.info(f"Wiki index DB ready at {db_path} ({len(_zim_books)} ZIM books)")
|
|
return _db_conn
|
|
except Exception as e:
|
|
logger.error(f"Failed to open wiki index DB: {e}")
|
|
return None
|
|
|
|
|
|
def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
|
|
"""
|
|
Look up wiki data for a place by name and country, with optional state.
|
|
|
|
Args:
|
|
place_name: Name of the place (e.g., "Twin Falls")
|
|
osm_key: OSM key (unused, kept for API compatibility)
|
|
osm_value: OSM value (unused, kept for API compatibility)
|
|
state: State/province name (may be None or empty)
|
|
country_code: ISO country code (e.g., "us", "ca")
|
|
|
|
Returns:
|
|
dict with wiki_summary, wiki_url, wikivoyage_url, wiki_population
|
|
or None if no match found.
|
|
"""
|
|
db = _get_db()
|
|
if db is None:
|
|
return None
|
|
|
|
# Normalize inputs
|
|
place_name = (place_name or '').strip()
|
|
state = (state or '').strip() if state else ''
|
|
country_code = (country_code or '').strip().lower()
|
|
|
|
if not place_name or not country_code:
|
|
return None
|
|
|
|
try:
|
|
row = None
|
|
|
|
# Try exact match with state first (if state provided)
|
|
if state:
|
|
row = db.execute("""
|
|
SELECT
|
|
summary,
|
|
wikipedia_title,
|
|
wikivoyage_title,
|
|
wikipedia_exists,
|
|
wikivoyage_exists,
|
|
wiki_population
|
|
FROM wiki_places
|
|
WHERE place_name = ?
|
|
AND state = ?
|
|
AND country_code = ?
|
|
AND summary IS NOT NULL
|
|
ORDER BY importance DESC
|
|
LIMIT 1
|
|
""", (place_name, state, country_code)).fetchone()
|
|
|
|
# Fall back to name + country only (for places without state in query)
|
|
if not row:
|
|
row = db.execute("""
|
|
SELECT
|
|
summary,
|
|
wikipedia_title,
|
|
wikivoyage_title,
|
|
wikipedia_exists,
|
|
wikivoyage_exists,
|
|
wiki_population
|
|
FROM wiki_places
|
|
WHERE place_name = ?
|
|
AND country_code = ?
|
|
AND summary IS NOT NULL
|
|
ORDER BY importance DESC
|
|
LIMIT 1
|
|
""", (place_name, country_code)).fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
result = {}
|
|
|
|
# Summary
|
|
if row['summary']:
|
|
result['wiki_summary'] = row['summary']
|
|
|
|
# Wikipedia URL
|
|
if row['wikipedia_exists'] and row['wikipedia_title'] and 'wikipedia' in _zim_books:
|
|
base_url = _zim_books['wikipedia']
|
|
title = row['wikipedia_title'].replace(' ', '_')
|
|
result['wiki_url'] = f"{base_url}/A/{title}"
|
|
|
|
# Wikivoyage URL
|
|
if row['wikivoyage_exists'] and row['wikivoyage_title'] and 'wikivoyage' in _zim_books:
|
|
base_url = _zim_books['wikivoyage']
|
|
title = row['wikivoyage_title'].replace(' ', '_')
|
|
result['wikivoyage_url'] = f"{base_url}/A/{title}"
|
|
|
|
# Population
|
|
if row['wiki_population']:
|
|
result['wiki_population'] = row['wiki_population']
|
|
|
|
return result if result else None
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Wiki lookup error for {place_name}: {e}")
|
|
return None
|
|
|
|
|
|
def is_available():
|
|
"""Check if the wiki index DB is available."""
|
|
return _get_db() is not None
|