From f42b1fef3ba07bd5a7f3fa4b63adeb8c8ec38180 Mon Sep 17 00:00:00 2001 From: malice Date: Fri, 22 May 2026 13:23:08 -0600 Subject: [PATCH] recon: add /api/wiki-enrich endpoint (extraction #5 prep, additive) (#8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HTTP wrapper over the wiki_index lookup so the (future) navi-places service can fetch wiki enrichment over HTTP instead of reading recon's 2.1 GB data/wiki_index.db directly (Phase A option B — HTTP coupling). GET /api/wiki-enrich?wikidata= (primary key) GET /api/wiki-enrich?name=&country= (fallback key) -> 200 {wiki_summary?, wiki_population?, wiki_url?, wikivoyage_url?} -> 400 if no usable key; 404 on no match. Public (no auth, like /api/place/*). Route keys are wikidata_id / name+country — NOT osm_type/osm_id — because that is how wiki_index is actually queried (the in-process _enrich_with_wiki_index looks up by result['wikidata_id'] then name+country_code, never by OSM id; see extraction-5-wiki-enrich-investigation.md). An osm-keyed route would have forced a redundant in-recon place lookup. Changes (additive only): - lib/place_detail.py: new standalone lookup_wiki_index(wikidata_id, name, country_code) doing the same two SELECTs + field/URL mapping as the in-process path, returning a dict or None. Pure DB read, never raises. `_enrich_with_wiki_index` is LEFT UNTOUCHED — it can be DRY-refactored to delegate to this in a later PR; the in-process enrichment path is unchanged. - lib/wiki_enrich_api.py: new wiki_enrich_bp blueprint with the route. - lib/api.py: register the blueprint (one block). - lib/wiki_enrich_api_test.py: 4 tests (hit-by-wikidata + decoded fields, no-match -> 404, name+country fallback, no-key -> 400) over an in-memory fixture DB; plain-assert style + __main__ runner (recon venv has no pytest). Verified green against recon's venv (flask 3.1.2). Does NOT remove the in-process _enrich_with_wiki_index call from place_detail — that happens in a later PR once navi-places is live and serving. Co-authored-by: Claude Opus 4.7 (1M context) --- lib/api.py | 4 ++ lib/place_detail.py | 64 ++++++++++++++++++++++++++++++ lib/wiki_enrich_api.py | 31 +++++++++++++++ lib/wiki_enrich_api_test.py | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 lib/wiki_enrich_api.py create mode 100644 lib/wiki_enrich_api_test.py diff --git a/lib/api.py b/lib/api.py index 7aae00f..c9f991e 100644 --- a/lib/api.py +++ b/lib/api.py @@ -73,6 +73,10 @@ from .netsyms_api import netsyms_bp, geocode_bp app.register_blueprint(netsyms_bp) app.register_blueprint(geocode_bp) +# ── Wiki-enrich Blueprint (extraction #5 prep — HTTP wrapper over wiki_index) ── +from .wiki_enrich_api import wiki_enrich_bp +app.register_blueprint(wiki_enrich_bp) + # ── Navigation Constants ── diff --git a/lib/place_detail.py b/lib/place_detail.py index e2515b5..46aa8b0 100644 --- a/lib/place_detail.py +++ b/lib/place_detail.py @@ -307,6 +307,70 @@ def _get_wiki_index_db(): return _wiki_index_conn +def lookup_wiki_index(wikidata_id=None, name=None, country_code=None): + """Standalone wiki_index lookup, extracted for the /api/wiki-enrich endpoint + (extraction #5: navi-places HTTP-fetches wiki enrichment instead of reading + the 2.1 GB wiki_index.db directly). + + Mirrors the lookup that `_enrich_with_wiki_index` performs in-process: + by wikidata_id first, then a name + country_code fallback. Returns a dict of + wiki enrichment fields (only those present), or None if there is no match or + the wiki_index DB is unavailable. Pure DB read — no feature-flag gating + (callers decide whether to call) and never raises. + + NOTE: additive only — `_enrich_with_wiki_index` is intentionally left + untouched here; it can be DRY-refactored to delegate to this in a later PR. + """ + db = _get_wiki_index_db() + if not db: + return None + + try: + cur = db.cursor() + row = None + + if wikidata_id: + wid = wikidata_id + if isinstance(wid, str) and wid.startswith("http"): + wid = wid.split("/")[-1] + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE wikidata_id = ?", + (wid,) + ) + row = cur.fetchone() + + if not row and name and country_code: + cur.execute( + "SELECT summary, wiki_population, wikipedia_title, wikivoyage_title FROM wiki_places WHERE place_name = ? AND country_code = ? LIMIT 1", + (name, country_code.lower()) + ) + row = cur.fetchone() + + if not row: + return None + + out = {} + if row["summary"]: + out["wiki_summary"] = row["summary"] + if row["wiki_population"]: + try: + out["wiki_population"] = int(row["wiki_population"]) + except (ValueError, TypeError): + out["wiki_population"] = row["wiki_population"] + if row["wikipedia_title"]: + title = row["wikipedia_title"].replace(" ", "_") + out["wiki_url"] = f"https://en.wikipedia.org/wiki/{title}" + if row["wikivoyage_title"]: + title = row["wikivoyage_title"].replace(" ", "_") + out["wikivoyage_url"] = f"https://en.wikivoyage.org/wiki/{title}" + + return out or None + + except Exception as e: + logger.debug(f"wiki_index lookup error: {e}") + return None + + def _enrich_with_wiki_index(result): try: from .deployment_config import get_deployment_config diff --git a/lib/wiki_enrich_api.py b/lib/wiki_enrich_api.py new file mode 100644 index 0000000..ff0f9c7 --- /dev/null +++ b/lib/wiki_enrich_api.py @@ -0,0 +1,31 @@ +"""Wiki-enrich API — read-only HTTP wrapper over the wiki_index lookup. + +Extraction #5 prep: lets the (future) navi-places service fetch wiki enrichment +over HTTP instead of reading recon's 2.1 GB data/wiki_index.db directly. Additive +only — does not change place_detail's in-process `_enrich_with_wiki_index` path. + + GET /api/wiki-enrich?wikidata= (primary key) + GET /api/wiki-enrich?name=&country= (fallback key) + +Public (no auth), matching /api/place/*. 400 if no usable key; 404 on no match. +""" +from flask import Blueprint, request, jsonify + +from .place_detail import lookup_wiki_index + +wiki_enrich_bp = Blueprint('wiki_enrich', __name__) + + +@wiki_enrich_bp.route('/api/wiki-enrich') +def api_wiki_enrich(): + wikidata = (request.args.get('wikidata') or '').strip() or None + name = (request.args.get('name') or '').strip() or None + country = (request.args.get('country') or '').strip() or None + + if not wikidata and not (name and country): + return jsonify({'error': 'provide ?wikidata= or ?name=&country='}), 400 + + result = lookup_wiki_index(wikidata_id=wikidata, name=name, country_code=country) + if result is None: + return jsonify({'error': 'no wiki match'}), 404 + return jsonify(result) diff --git a/lib/wiki_enrich_api_test.py b/lib/wiki_enrich_api_test.py new file mode 100644 index 0000000..681e5cb --- /dev/null +++ b/lib/wiki_enrich_api_test.py @@ -0,0 +1,77 @@ +"""Tests for the /api/wiki-enrich endpoint (extraction #5 prep). + +Plain-assert style (matching the other lib *_test.py; recon's venv has no +pytest). Builds a minimal Flask app with only wiki_enrich_bp registered (avoids +importing the full recon app) and points place_detail's lazy wiki_index +connection at an in-memory fixture DB. Run with pytest, or directly: + python -m lib.wiki_enrich_api_test +""" +import sqlite3 + +from flask import Flask + +from lib import place_detail +from lib.wiki_enrich_api import wiki_enrich_bp + + +def _client(): + """Fresh in-memory wiki_index fixture + a minimal app with just the route.""" + conn = sqlite3.connect(":memory:", check_same_thread=False) + conn.row_factory = sqlite3.Row + conn.execute( + "CREATE TABLE wiki_places (wikidata_id TEXT, place_name TEXT, country_code TEXT, " + "summary TEXT, wiki_population TEXT, wikipedia_title TEXT, wikivoyage_title TEXT)" + ) + conn.execute( + "INSERT INTO wiki_places VALUES (?,?,?,?,?,?,?)", + ("Q830149", "Filer", "us", "A city in Idaho.", "2508", "Filer, Idaho", "Filer"), + ) + conn.commit() + # Point the lazy module-level connection at the fixture so + # _get_wiki_index_db()/lookup_wiki_index() use it (bypasses the file path). + place_detail._wiki_index_conn = conn + app = Flask(__name__) + app.register_blueprint(wiki_enrich_bp) + return app.test_client() + + +def test_wiki_enrich_hit_by_wikidata(): + resp = _client().get("/api/wiki-enrich?wikidata=Q830149") + assert resp.status_code == 200, resp.status_code + d = resp.get_json() + assert d["wiki_summary"] == "A city in Idaho." + assert d["wiki_population"] == 2508 # cast to int + assert d["wiki_url"] == "https://en.wikipedia.org/wiki/Filer,_Idaho" + assert d["wikivoyage_url"] == "https://en.wikivoyage.org/wiki/Filer" + + +def test_wiki_enrich_no_match_404(): + resp = _client().get("/api/wiki-enrich?wikidata=Q9999999") + assert resp.status_code == 404, resp.status_code + + +def test_wiki_enrich_name_country_fallback(): + resp = _client().get("/api/wiki-enrich?name=Filer&country=US") + assert resp.status_code == 200, resp.status_code + assert resp.get_json()["wiki_summary"] == "A city in Idaho." + + +def test_wiki_enrich_no_key_400(): + c = _client() + assert c.get("/api/wiki-enrich").status_code == 400 + # name without country is not a usable key + assert c.get("/api/wiki-enrich?name=Filer").status_code == 400 + + +if __name__ == "__main__": + failures = 0 + for _name, _fn in sorted(globals().items()): + if _name.startswith("test_") and callable(_fn): + try: + _fn() + print(f"PASS {_name}") + except Exception as exc: # noqa: BLE001 + failures += 1 + print(f"FAIL {_name}: {exc!r}") + print("OK" if failures == 0 else f"{failures} FAILED") + raise SystemExit(1 if failures else 0)