mirror of
https://github.com/zvx-echo6/recon.git
synced 2026-05-20 06:34:40 +02:00
Compare commits
5 commits
master
...
feature/na
| Author | SHA1 | Date | |
|---|---|---|---|
| 248f4bded4 | |||
| 1b3ad1bf9e | |||
|
|
bb7d0169e0 | ||
| c92178d90f | |||
|
|
2d1dcbf70c |
4 changed files with 355 additions and 0 deletions
|
|
@ -47,6 +47,18 @@ def _get_db():
|
|||
)
|
||||
""")
|
||||
_db_conn.commit()
|
||||
# Schema migration: add Google columns to place_cache if missing
|
||||
for col, coldef in [
|
||||
('google_place_id', 'TEXT'),
|
||||
('google_data', 'TEXT'),
|
||||
('google_fetched_at', 'INTEGER'),
|
||||
]:
|
||||
try:
|
||||
_db_conn.execute(f'ALTER TABLE place_cache ADD COLUMN {col} {coldef}')
|
||||
logger.info(f'Added column {col} to place_cache')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
_db_conn.commit()
|
||||
return _db_conn
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,66 @@ from .utils import setup_logging
|
|||
|
||||
logger = setup_logging('recon.netsyms_api')
|
||||
|
||||
|
||||
def _enrich_reverse_result_with_wiki(result):
|
||||
"""
|
||||
Add wiki data to a reverse geocode result if available.
|
||||
Only runs when has_kiwix_wiki is enabled.
|
||||
"""
|
||||
try:
|
||||
from .deployment_config import get_deployment_config
|
||||
deploy_config = get_deployment_config()
|
||||
features = deploy_config.get('features', {})
|
||||
if not features.get('has_kiwix_wiki', False):
|
||||
return result
|
||||
except Exception:
|
||||
return result
|
||||
|
||||
try:
|
||||
from . import wiki_index
|
||||
except ImportError:
|
||||
return result
|
||||
|
||||
if not wiki_index.is_available():
|
||||
return result
|
||||
|
||||
# Extract match criteria from Photon raw props
|
||||
raw = result.get('raw', {})
|
||||
place_name = raw.get('name', '')
|
||||
osm_key = raw.get('osm_key', '')
|
||||
osm_value = raw.get('osm_value', '')
|
||||
state = raw.get('state', '')
|
||||
country = raw.get('country', '')
|
||||
|
||||
# Extract country code (Photon uses full country name, we need code)
|
||||
country_code = raw.get('countrycode', '').lower()
|
||||
if not country_code:
|
||||
country_lower = country.lower() if country else ''
|
||||
if 'united states' in country_lower or country_lower == 'usa':
|
||||
country_code = 'us'
|
||||
elif 'canada' in country_lower:
|
||||
country_code = 'ca'
|
||||
|
||||
if not place_name or not osm_key or not osm_value or not country_code:
|
||||
return result
|
||||
|
||||
# Look up wiki data
|
||||
wiki_data = wiki_index.lookup_wiki(place_name, osm_key, osm_value, state, country_code)
|
||||
if wiki_data:
|
||||
# Add wiki fields to result (additive only)
|
||||
if 'wiki_summary' in wiki_data:
|
||||
result['wiki_summary'] = wiki_data['wiki_summary']
|
||||
if 'wiki_url' in wiki_data:
|
||||
result['wiki_url'] = wiki_data['wiki_url']
|
||||
if 'wikivoyage_url' in wiki_data:
|
||||
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
|
||||
if 'wiki_population' in wiki_data:
|
||||
result['wiki_population'] = wiki_data['wiki_population']
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
netsyms_bp = Blueprint('netsyms', __name__)
|
||||
geocode_bp = Blueprint('geocode', __name__)
|
||||
|
||||
|
|
@ -123,4 +183,7 @@ def api_reverse():
|
|||
from .geocode import _parse_photon_features
|
||||
results = _parse_photon_features(features, source='photon_reverse')
|
||||
|
||||
# Enrich results with wiki data
|
||||
results = [_enrich_reverse_result_with_wiki(r) for r in results]
|
||||
|
||||
return jsonify({'query': query_str, 'results': results, 'count': len(results)})
|
||||
|
|
|
|||
|
|
@ -22,6 +22,41 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
|||
OVERPASS_UA = "Navi/1.0 (forge.echo6.co/matt/recon)"
|
||||
VALID_OSM_TYPES = {"N", "W", "R"}
|
||||
|
||||
# US states and Canadian provinces for wikipedia title parsing
|
||||
US_STATES = {
|
||||
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
|
||||
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
|
||||
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
|
||||
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
|
||||
'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
|
||||
'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
|
||||
'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
|
||||
'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
|
||||
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
|
||||
'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
|
||||
}
|
||||
|
||||
CANADIAN_PROVINCES = {
|
||||
'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
|
||||
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
|
||||
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon'
|
||||
}
|
||||
|
||||
|
||||
def _parse_state_from_wikipedia(wikipedia_tag):
|
||||
"""Parse state/province and country from wikipedia extratag like 'en:Joliet, Illinois'"""
|
||||
if not wikipedia_tag or not wikipedia_tag.startswith('en:'):
|
||||
return None, None
|
||||
title = wikipedia_tag[3:]
|
||||
for state in US_STATES:
|
||||
if state in title:
|
||||
return state, 'us'
|
||||
for prov in CANADIAN_PROVINCES:
|
||||
if prov in title:
|
||||
return prov, 'ca'
|
||||
return None, None
|
||||
|
||||
|
||||
_db_conn = None
|
||||
|
||||
|
||||
|
|
@ -290,6 +325,11 @@ def _enrich_wiki_links(result):
|
|||
"""
|
||||
Rewrite wiki-related extratags to local Kiwix URLs where available.
|
||||
Falls back to public URLs. Only runs when has_wiki_rewriting is enabled.
|
||||
|
||||
Note: When has_kiwix_wiki is enabled, we skip rewriting 'wikipedia' since
|
||||
the wiki_index enrichment provides a proper wiki_url field. This keeps
|
||||
extratags.wikipedia in the original OSM format for frontend link builders.
|
||||
|
||||
Returns the (possibly enriched) result dict.
|
||||
"""
|
||||
try:
|
||||
|
|
@ -298,6 +338,8 @@ def _enrich_wiki_links(result):
|
|||
features = deploy_config.get('features', {})
|
||||
if not features.get('has_wiki_rewriting', False):
|
||||
return result
|
||||
# When has_kiwix_wiki is enabled, skip wikipedia rewriting (wiki_url handles it)
|
||||
has_kiwix_wiki = features.get('has_kiwix_wiki', False)
|
||||
except Exception:
|
||||
return result
|
||||
|
||||
|
|
@ -313,6 +355,9 @@ def _enrich_wiki_links(result):
|
|||
|
||||
rewrites = {}
|
||||
for tag in _WIKI_TAGS:
|
||||
# Skip wikipedia when has_kiwix_wiki is enabled (wiki_url provides the local link)
|
||||
if tag == 'wikipedia' and has_kiwix_wiki:
|
||||
continue
|
||||
value = extratags.get(tag)
|
||||
if not value:
|
||||
continue
|
||||
|
|
@ -328,6 +373,82 @@ def _enrich_wiki_links(result):
|
|||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# ── Wiki Index enrichment ───────────────────────────────────────────────
|
||||
|
||||
def _enrich_with_wiki_index(result):
|
||||
"""
|
||||
Add wiki summary, URLs, and population from wiki_index.db.
|
||||
Only runs when has_kiwix_wiki is enabled. Direct match only.
|
||||
Returns the (possibly enriched) result dict.
|
||||
"""
|
||||
try:
|
||||
from .deployment_config import get_deployment_config
|
||||
deploy_config = get_deployment_config()
|
||||
features = deploy_config.get('features', {})
|
||||
if not features.get('has_kiwix_wiki', False):
|
||||
return result
|
||||
except Exception:
|
||||
return result
|
||||
|
||||
try:
|
||||
from . import wiki_index
|
||||
except ImportError:
|
||||
logger.debug("wiki_index module not available")
|
||||
return result
|
||||
|
||||
if not wiki_index.is_available():
|
||||
return result
|
||||
|
||||
# Extract match criteria from result
|
||||
name = result.get('name', '')
|
||||
osm_class = result.get('class', '')
|
||||
osm_type_tag = result.get('type', '')
|
||||
address = result.get('address', {})
|
||||
state = address.get('state', '')
|
||||
country_code = address.get('country_code', '')
|
||||
|
||||
# If state/country missing, try to derive from wikipedia extratag
|
||||
extratags = result.get('extratags', {})
|
||||
if (not state or not country_code) and extratags.get('wikipedia'):
|
||||
derived_state, derived_country = _parse_state_from_wikipedia(extratags['wikipedia'])
|
||||
if not state and derived_state:
|
||||
state = derived_state
|
||||
if not country_code and derived_country:
|
||||
country_code = derived_country
|
||||
|
||||
# Handle boundary/administrative - get actual place type from extratags
|
||||
# (e.g. boundary:administrative with extratags.place='city' -> place:city)
|
||||
if osm_class == 'boundary' and osm_type_tag == 'administrative':
|
||||
place_tag = extratags.get('place') or extratags.get('linked_place')
|
||||
if place_tag:
|
||||
osm_class = 'place'
|
||||
osm_type_tag = place_tag
|
||||
|
||||
if not name or not osm_class or not osm_type_tag:
|
||||
return result
|
||||
|
||||
# Look up wiki data
|
||||
wiki_data = wiki_index.lookup_wiki(name, osm_class, osm_type_tag, state, country_code)
|
||||
if not wiki_data:
|
||||
return result
|
||||
|
||||
# Add wiki fields to result (additive only)
|
||||
if 'wiki_summary' in wiki_data:
|
||||
result['wiki_summary'] = wiki_data['wiki_summary']
|
||||
if 'wiki_url' in wiki_data:
|
||||
result['wiki_url'] = wiki_data['wiki_url']
|
||||
if 'wikivoyage_url' in wiki_data:
|
||||
result['wikivoyage_url'] = wiki_data['wikivoyage_url']
|
||||
if 'wiki_population' in wiki_data:
|
||||
result['wiki_population'] = wiki_data['wiki_population']
|
||||
|
||||
result.setdefault('sources', {})['wiki_index'] = True
|
||||
logger.debug(f"Wiki index enrichment for {name}")
|
||||
|
||||
return result
|
||||
|
||||
# ── Nominatim parsing ───────────────────────────────────────────────────
|
||||
|
||||
# Nominatim address array uses rank_address to indicate what each entry is.
|
||||
|
|
@ -444,6 +565,8 @@ def _parse_nominatim(data):
|
|||
'wheelchair': raw_extra.get('wheelchair'),
|
||||
'fee': raw_extra.get('fee'),
|
||||
'takeaway': raw_extra.get('takeaway'),
|
||||
'place': raw_extra.get('place'),
|
||||
'linked_place': raw_extra.get('linked_place'),
|
||||
}
|
||||
|
||||
# Category: use extratags.place for boundaries (e.g. "city"), else class/type
|
||||
|
|
@ -593,6 +716,7 @@ def get_place_detail(osm_type, osm_id):
|
|||
# 1. Check cache
|
||||
cached = cache_get(osm_type, osm_id)
|
||||
if cached:
|
||||
cached = _enrich_with_wiki_index(cached)
|
||||
logger.debug(f"Cache hit: {osm_type}/{osm_id}")
|
||||
return cached, 200
|
||||
|
||||
|
|
@ -625,6 +749,7 @@ def get_place_detail(osm_type, osm_id):
|
|||
nominatim_result = _enrich_with_overture(nominatim_result, osm_type, osm_id)
|
||||
nominatim_result = _enrich_with_google(nominatim_result, osm_type, osm_id)
|
||||
nominatim_result = _enrich_wiki_links(nominatim_result)
|
||||
nominatim_result = _enrich_with_wiki_index(nominatim_result)
|
||||
cache_put(osm_type, osm_id, nominatim_result, 'nominatim_local')
|
||||
return nominatim_result, 200
|
||||
|
||||
|
|
@ -658,6 +783,7 @@ def get_place_detail(osm_type, osm_id):
|
|||
overpass_result = _enrich_with_overture(overpass_result, osm_type, osm_id)
|
||||
overpass_result = _enrich_with_google(overpass_result, osm_type, osm_id)
|
||||
overpass_result = _enrich_wiki_links(overpass_result)
|
||||
overpass_result = _enrich_with_wiki_index(overpass_result)
|
||||
cache_put(osm_type, osm_id, overpass_result, 'overpass')
|
||||
return overpass_result, 200
|
||||
|
||||
|
|
|
|||
154
lib/wiki_index.py
Normal file
154
lib/wiki_index.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
"""
|
||||
Wiki location index lookup.
|
||||
|
||||
Provides wiki summaries, URLs, and population data from the wiki_index.db
|
||||
for place detail enrichment. Read-only, opened once at startup.
|
||||
|
||||
DB path: /opt/recon/data/wiki_index.db
|
||||
"""
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
from .utils import setup_logging
|
||||
|
||||
logger = setup_logging('recon.wiki_index')
|
||||
|
||||
_db_conn = None
|
||||
_zim_books = {}
|
||||
|
||||
|
||||
def _get_db():
|
||||
"""Return a module-level SQLite connection (lazy init, read-only)."""
|
||||
global _db_conn, _zim_books
|
||||
|
||||
if _db_conn is not None:
|
||||
return _db_conn
|
||||
|
||||
db_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'data', 'wiki_index.db'
|
||||
)
|
||||
|
||||
if not os.path.exists(db_path):
|
||||
logger.warning(f"Wiki index DB not found at {db_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Open read-only with URI
|
||||
_db_conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, check_same_thread=False)
|
||||
_db_conn.row_factory = sqlite3.Row
|
||||
|
||||
# Load zim_books for URL construction
|
||||
rows = _db_conn.execute("SELECT book_type, public_url FROM zim_books").fetchall()
|
||||
for row in rows:
|
||||
_zim_books[row['book_type']] = row['public_url']
|
||||
|
||||
logger.info(f"Wiki index DB ready at {db_path} ({len(_zim_books)} ZIM books)")
|
||||
return _db_conn
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to open wiki index DB: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def lookup_wiki(place_name, osm_key, osm_value, state, country_code):
|
||||
"""
|
||||
Look up wiki data for a place by name and country, with optional state.
|
||||
|
||||
Args:
|
||||
place_name: Name of the place (e.g., "Twin Falls")
|
||||
osm_key: OSM key (unused, kept for API compatibility)
|
||||
osm_value: OSM value (unused, kept for API compatibility)
|
||||
state: State/province name (may be None or empty)
|
||||
country_code: ISO country code (e.g., "us", "ca")
|
||||
|
||||
Returns:
|
||||
dict with wiki_summary, wiki_url, wikivoyage_url, wiki_population
|
||||
or None if no match found.
|
||||
"""
|
||||
db = _get_db()
|
||||
if db is None:
|
||||
return None
|
||||
|
||||
# Normalize inputs
|
||||
place_name = (place_name or '').strip()
|
||||
state = (state or '').strip() if state else ''
|
||||
country_code = (country_code or '').strip().lower()
|
||||
|
||||
if not place_name or not country_code:
|
||||
return None
|
||||
|
||||
try:
|
||||
row = None
|
||||
|
||||
# Try exact match with state first (if state provided)
|
||||
if state:
|
||||
row = db.execute("""
|
||||
SELECT
|
||||
summary,
|
||||
wikipedia_title,
|
||||
wikivoyage_title,
|
||||
wikipedia_exists,
|
||||
wikivoyage_exists,
|
||||
wiki_population
|
||||
FROM wiki_places
|
||||
WHERE place_name = ?
|
||||
AND state = ?
|
||||
AND country_code = ?
|
||||
AND summary IS NOT NULL
|
||||
ORDER BY importance DESC
|
||||
LIMIT 1
|
||||
""", (place_name, state, country_code)).fetchone()
|
||||
|
||||
# Fall back to name + country only (for places without state in query)
|
||||
if not row:
|
||||
row = db.execute("""
|
||||
SELECT
|
||||
summary,
|
||||
wikipedia_title,
|
||||
wikivoyage_title,
|
||||
wikipedia_exists,
|
||||
wikivoyage_exists,
|
||||
wiki_population
|
||||
FROM wiki_places
|
||||
WHERE place_name = ?
|
||||
AND country_code = ?
|
||||
AND summary IS NOT NULL
|
||||
ORDER BY importance DESC
|
||||
LIMIT 1
|
||||
""", (place_name, country_code)).fetchone()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
result = {}
|
||||
|
||||
# Summary
|
||||
if row['summary']:
|
||||
result['wiki_summary'] = row['summary']
|
||||
|
||||
# Wikipedia URL
|
||||
if row['wikipedia_exists'] and row['wikipedia_title'] and 'wikipedia' in _zim_books:
|
||||
base_url = _zim_books['wikipedia']
|
||||
title = row['wikipedia_title'].replace(' ', '_')
|
||||
result['wiki_url'] = f"{base_url}/A/{title}"
|
||||
|
||||
# Wikivoyage URL
|
||||
if row['wikivoyage_exists'] and row['wikivoyage_title'] and 'wikivoyage' in _zim_books:
|
||||
base_url = _zim_books['wikivoyage']
|
||||
title = row['wikivoyage_title'].replace(' ', '_')
|
||||
result['wikivoyage_url'] = f"{base_url}/A/{title}"
|
||||
|
||||
# Population
|
||||
if row['wiki_population']:
|
||||
result['wiki_population'] = row['wiki_population']
|
||||
|
||||
return result if result else None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Wiki lookup error for {place_name}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def is_available():
|
||||
"""Check if the wiki index DB is available."""
|
||||
return _get_db() is not None
|
||||
Loading…
Add table
Add a link
Reference in a new issue