Compare commits

...

5 commits

Author SHA1 Message Date
248f4bded4 Fix wiki lookup to match on name+state+country instead of osm_key/osm_value
- Remove osm_key/osm_value from wiki_places lookup query
- Add fallback matching: try state first, then country only
- Parse state/country from wikipedia extratag when address is empty
- Add US states and Canadian provinces parsing for wikipedia titles
- Apply wiki enrichment to cached results (was missing)

Fixes wiki_summary and wiki_url not appearing for boundary/administrative
places like Joliet, IL where OSM returns boundary/administrative but
wiki_places has place/city.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-03 00:17:49 +00:00
1b3ad1bf9e fix(google_places): add schema migration for Google columns
The place_cache table was missing google_place_id, google_data, and
google_fetched_at columns needed by the Google Places enrichment.

Adds ALTER TABLE migration in _get_db() to add columns if missing,
wrapped in try/except to handle existing columns gracefully.

Fixes HTTP 500 on /api/place/W/* endpoints.
2026-04-30 03:22:38 +00:00
Ubuntu
bb7d0169e0 fix(place): preserve extratags.wikipedia format when has_kiwix_wiki enabled
When has_kiwix_wiki is enabled, skip rewriting extratags.wikipedia to
a Kiwix content URL. The wiki_url field already provides the correct
local Kiwix viewer link for the WikiSummarySection.

This preserves the original OSM format (e.g. "en:Title") in
extratags.wikipedia so the frontend LINKS section can properly build
wikipedia.org URLs.

Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-29 18:53:03 +00:00
c92178d90f fix(place): handle boundary/administrative for wiki lookup
Add place/linked_place to extratags so wiki enrichment can detect
actual place type (e.g. boundary:administrative with place=city
should match wiki_places osm_key=place, osm_value=city).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-29 17:21:38 +00:00
Ubuntu
2d1dcbf70c feat(place): add wiki location index lookup
Add wiki summary, URLs, and population data from wiki_index.db to
place detail and reverse geocode responses.

- Add lib/wiki_index.py: SQLite read-only lookup module
- Enrich /api/place responses via _enrich_with_wiki_index()
- Enrich /api/reverse responses via _enrich_reverse_result_with_wiki()
- Gate on has_kiwix_wiki feature flag (default false in home.yaml)
- Direct match only on place_name + osm_key + osm_value + state + country_code
- Additive fields: wiki_summary, wiki_url, wikivoyage_url, wiki_population

DB path: /opt/recon/data/wiki_index.db

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-29 17:07:31 +00:00
4 changed files with 355 additions and 0 deletions

View file

@ -47,6 +47,18 @@ def _get_db():
)
""")
_db_conn.commit()
# Schema migration: add Google columns to place_cache if missing
for col, coldef in [
('google_place_id', 'TEXT'),
('google_data', 'TEXT'),
('google_fetched_at', 'INTEGER'),
]:
try:
_db_conn.execute(f'ALTER TABLE place_cache ADD COLUMN {col} {coldef}')
logger.info(f'Added column {col} to place_cache')
except sqlite3.OperationalError:
pass # Column already exists
_db_conn.commit()
return _db_conn

View file

@ -15,6 +15,66 @@ from .utils import setup_logging
logger = setup_logging('recon.netsyms_api')
def _enrich_reverse_result_with_wiki(result):
"""
Add wiki data to a reverse geocode result if available.
Only runs when has_kiwix_wiki is enabled.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_kiwix_wiki', False):
return result
except Exception:
return result
try:
from . import wiki_index
except ImportError:
return result
if not wiki_index.is_available():
return result
# Extract match criteria from Photon raw props
raw = result.get('raw', {})
place_name = raw.get('name', '')
osm_key = raw.get('osm_key', '')
osm_value = raw.get('osm_value', '')
state = raw.get('state', '')
country = raw.get('country', '')
# Extract country code (Photon uses full country name, we need code)
country_code = raw.get('countrycode', '').lower()
if not country_code:
country_lower = country.lower() if country else ''
if 'united states' in country_lower or country_lower == 'usa':
country_code = 'us'
elif 'canada' in country_lower:
country_code = 'ca'
if not place_name or not osm_key or not osm_value or not country_code:
return result
# Look up wiki data
wiki_data = wiki_index.lookup_wiki(place_name, osm_key, osm_value, state, country_code)
if wiki_data:
# Add wiki fields to result (additive only)
if 'wiki_summary' in wiki_data:
result['wiki_summary'] = wiki_data['wiki_summary']
if 'wiki_url' in wiki_data:
result['wiki_url'] = wiki_data['wiki_url']
if 'wikivoyage_url' in wiki_data:
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
if 'wiki_population' in wiki_data:
result['wiki_population'] = wiki_data['wiki_population']
return result
netsyms_bp = Blueprint('netsyms', __name__)
geocode_bp = Blueprint('geocode', __name__)
@ -123,4 +183,7 @@ def api_reverse():
from .geocode import _parse_photon_features
results = _parse_photon_features(features, source='photon_reverse')
# Enrich results with wiki data
results = [_enrich_reverse_result_with_wiki(r) for r in results]
return jsonify({'query': query_str, 'results': results, 'count': len(results)})

View file

@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)"
VALID_OSM_TYPES = {"N", "W", "R"}
# US states and Canadian provinces for wikipedia title parsing
US_STATES = {
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
}
CANADIAN_PROVINCES = {
'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon'
}
def _parse_state_from_wikipedia(wikipedia_tag):
"""Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'"""
if not wikipedia_tag or not wikipedia_tag.startswith('en:'):
return None, None
title = wikipedia_tag[3:]
for state in US_STATES:
if state in title:
return state, 'us'
for prov in CANADIAN_PROVINCES:
if prov in title:
return prov, 'ca'
return None, None
_db_conn = None
@ -290,6 +325,11 @@ def _enrich_wiki_links(result):
"""
Rewrite wiki-related extratags to local Kiwix URLs where available.
Falls back to public URLs. Only runs when has_wiki_rewriting is enabled.
Note: When has_kiwix_wiki is enabled, we skip rewriting 'wikipedia' since
the wiki_index enrichment provides a proper wiki_url field. This keeps
extratags.wikipedia in the original OSM format for frontend link builders.
Returns the (possibly enriched) result dict.
"""
try:
@ -298,6 +338,8 @@ def _enrich_wiki_links(result):
features = deploy_config.get('features', {})
if not features.get('has_wiki_rewriting', False):
return result
# When has_kiwix_wiki is enabled, skip wikipedia rewriting (wiki_url handles it)
has_kiwix_wiki = features.get('has_kiwix_wiki', False)
except Exception:
return result
@ -313,6 +355,9 @@ def _enrich_wiki_links(result):
rewrites = {}
for tag in _WIKI_TAGS:
# Skip wikipedia when has_kiwix_wiki is enabled (wiki_url provides the local link)
if tag == 'wikipedia' and has_kiwix_wiki:
continue
value = extratags.get(tag)
if not value:
continue
@ -328,6 +373,82 @@ def _enrich_wiki_links(result):
return result
# ── Wiki Index enrichment ───────────────────────────────────────────────
def _enrich_with_wiki_index(result):
"""
Add wiki summary, URLs, and population from wiki_index.db.
Only runs when has_kiwix_wiki is enabled. Direct match only.
Returns the (possibly enriched) result dict.
"""
try:
from .deployment_config import get_deployment_config
deploy_config = get_deployment_config()
features = deploy_config.get('features', {})
if not features.get('has_kiwix_wiki', False):
return result
except Exception:
return result
try:
from . import wiki_index
except ImportError:
logger.debug("wiki_index module not available")
return result
if not wiki_index.is_available():
return result
# Extract match criteria from result
name = result.get('name', '')
osm_class = result.get('class', '')
osm_type_tag = result.get('type', '')
address = result.get('address', {})
state = address.get('state', '')
country_code = address.get('country_code', '')
# If state/country missing, try to derive from wikipedia extratag
extratags = result.get('extratags', {})
if (not state or not country_code) and extratags.get('wikipedia'):
derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia'])
if not state and derived_state:
state = derived_state
if not country_code and derived_country:
country_code = derived_country
# Handle boundary/administrative - get actual place type from extratags
# (e.g. boundary:administrative with extratags.place='city' -> place:city)
if osm_class == 'boundary' and osm_type_tag == 'administrative':
place_tag = extratags.get('place') or extratags.get('linked_place')
if place_tag:
osm_class = 'place'
osm_type_tag = place_tag
if not name or not osm_class or not osm_type_tag:
return result
# Look up wiki data
wiki_data = wiki_index.lookup_wiki(name, osm_class, osm_type_tag, state, country_code)
if not wiki_data:
return result
# Add wiki fields to result (additive only)
if 'wiki_summary' in wiki_data:
result['wiki_summary'] = wiki_data['wiki_summary']
if 'wiki_url' in wiki_data:
result['wiki_url'] = wiki_data['wiki_url']
if 'wikivoyage_url' in wiki_data:
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
if 'wiki_population' in wiki_data:
result['wiki_population'] = wiki_data['wiki_population']
result.setdefault('sources', {})['wiki_index'] = True
logger.debug(f"Wiki index enrichment for {name}")
return result
# ── Nominatim parsing ───────────────────────────────────────────────────
# Nominatim address array uses rank_address to indicate what each entry is.
@ -444,6 +565,8 @@ def _parse_nominatim(data):
'wheelchair': raw_extra.get('wheelchair'),
'fee': raw_extra.get('fee'),
'takeaway': raw_extra.get('takeaway'),
'place': raw_extra.get('place'),
'linked_place': raw_extra.get('linked_place'),
}
# Category: use extratags.place for boundaries (e.g. "city"), else class/type
@ -593,6 +716,7 @@ def get_place_detail(osm_type, osm_id):
# 1. Check cache
cached = cache_get(osm_type, osm_id)
if cached:
cached = _enrich_with_wiki_index(cached)
logger.debug(f"Cache hit: {osm_type}/{osm_id}")
return cached, 200
@ -625,6 +749,7 @@ def get_place_detail(osm_type, osm_id):
nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id)
nominatim_result = _enrich_wiki_links(nominatim_result)
nominatim_result = _enrich_with_wiki_index(nominatim_result)
cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local')
return nominatim_result, 200
@ -658,6 +783,7 @@ def get_place_detail(osm_type, osm_id):
overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id)
overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id)
overpass_result = _enrich_wiki_links(overpass_result)
overpass_result = _enrich_with_wiki_index(overpass_result)
cache_put(osm_type, osm_id, overpass_result, 'overpass')
return overpass_result, 200

154
lib/wiki_index.py Normal file
View file

@ -0,0 +1,154 @@
"""
Wiki location index lookup.
Provides wiki summaries, URLs, and population data from the wiki_index.db
for place detail enrichment. Read-only, opened once at startup.
DB path: /opt/recon/data/wiki_index.db
"""
import os
import sqlite3
from .utils import setup_logging
logger = setup_logging('recon.wiki_index')
_db_conn = None
_zim_books = {}
def _get_db():
"""Return a module-level SQLite connection (lazy init, read-only)."""
global _db_conn, _zim_books
if _db_conn is not None:
return _db_conn
db_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'data', 'wiki_index.db'
)
if not os.path.exists(db_path):
logger.warning(f"Wiki index DB not found at {db_path}")
return None
try:
# Open read-only with URI
_db_conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False)
_db_conn.row_factory = sqlite3.Row
# Load zim_books for URL construction
rows = _db_conn.execute("SELECT book_type, public_url FROM zim_books").fetchall()
for row in rows:
_zim_books[row['book_type']] = row['public_url']
logger.info(f"Wiki index DB ready at {db_path} ({len(_zim_books)} ZIM books)")
return _db_conn
except Exception as e:
logger.error(f"Failed to open wiki index DB: {e}")
return None
def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
"""
Look up wiki data for a place by name and country, with optional state.
Args:
place_name: Name of the place (e.g., "Twin Falls")
osm_key: OSM key (unused, kept for API compatibility)
osm_value: OSM value (unused, kept for API compatibility)
state: State/province name (may be None or empty)
country_code: ISO country code (e.g., "us", "ca")
Returns:
dict with wiki_summary, wiki_url, wikivoyage_url, wiki_population
or None if no match found.
"""
db = _get_db()
if db is None:
return None
# Normalize inputs
place_name = (place_name or '').strip()
state = (state or '').strip() if state else ''
country_code = (country_code or '').strip().lower()
if not place_name or not country_code:
return None
try:
row = None
# Try exact match with state first (if state provided)
if state:
row = db.execute("""
SELECT
summary,
wikipedia_title,
wikivoyage_title,
wikipedia_exists,
wikivoyage_exists,
wiki_population
FROM wiki_places
WHERE place_name = ?
AND state = ?
AND country_code = ?
AND summary IS NOT NULL
ORDER BY importance DESC
LIMIT 1
""", (place_name, state, country_code)).fetchone()
# Fall back to name + country only (for places without state in query)
if not row:
row = db.execute("""
SELECT
summary,
wikipedia_title,
wikivoyage_title,
wikipedia_exists,
wikivoyage_exists,
wiki_population
FROM wiki_places
WHERE place_name = ?
AND country_code = ?
AND summary IS NOT NULL
ORDER BY importance DESC
LIMIT 1
""", (place_name, country_code)).fetchone()
if not row:
return None
result = {}
# Summary
if row['summary']:
result['wiki_summary'] = row['summary']
# Wikipedia URL
if row['wikipedia_exists'] and row['wikipedia_title'] and 'wikipedia' in _zim_books:
base_url = _zim_books['wikipedia']
title = row['wikipedia_title'].replace(' ', '_')
result['wiki_url'] = f"{base_url}/A/{title}"
# Wikivoyage URL
if row['wikivoyage_exists'] and row['wikivoyage_title'] and 'wikivoyage' in _zim_books:
base_url = _zim_books['wikivoyage']
title = row['wikivoyage_title'].replace(' ', '_')
result['wikivoyage_url'] = f"{base_url}/A/{title}"
# Population
if row['wiki_population']:
result['wiki_population'] = row['wiki_population']
return result if result else None
except Exception as e:
logger.warning(f"Wiki lookup error for {place_name}: {e}")
return None
def is_available():
"""Check if the wiki index DB is available."""
return _get_db() is not None