decouple: remove /api/wiki-rewrite (migrated to navi-places)

PR-B of decouple #4-REWRITE — the LAST recon→navi decoupling step. navi-places now owns the Kiwix link-rewrite logic in-process (navi-backend PR-A 7103c27, deployed + verified: Twin Falls live route returns wiki_rewrites local/public from navi's own wiki_cache.db; zero outbound calls to recon /api/wiki-rewrite). - DELETE lib/wiki_rewrite.py (the Kiwix rewrite logic — ported to navi-places). - DELETE lib/wiki_rewrite_api.py (the /api/wiki-rewrite blueprint). - DELETE lib/wiki_rewrite_api_test.py (tests the deleted endpoint). - api.py: drop the wiki_rewrite_bp import + register_blueprint + section comment. Verified zero recon consumers: nothing in recon imports wiki_rewrite — it was purely an HTTP endpoint for navi-places. After this, recon services make and receive zero navi-ecosystem runtime calls; recon is a fully separate product. Out-of-band (post-deploy): DROP TABLE wiki_cache from /opt/recon/data/place_cache.db (table only — place_cache + google_api_calls stay). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-10 08:54:34 +02:00 · 2026-05-23 21:21:22 -06:00 · 2026-05-23 21:21:22 -06:00 · 6365fe6756
commit 6365fe6756
parent ac99723e51
4 changed files with 0 additions and 436 deletions
--- a/lib/api.py
+++ b/lib/api.py
@ -62,11 +62,6 @@ app.request_class = _LargeZimRequest
 from .netsyms_api import netsyms_bp
 app.register_blueprint(netsyms_bp)

-# ── Wiki-rewrite Blueprint (extraction #5 prep — HTTP wrapper over rewrite_wiki_link) ──
-from .wiki_rewrite_api import wiki_rewrite_bp
-app.register_blueprint(wiki_rewrite_bp)
-
-

 # ── Navigation Constants ──

--- a/lib/wiki_rewrite.py
+++ b/lib/wiki_rewrite.py
@ -1,324 +0,0 @@
-"""
-Wiki link rewriter — rewrites OSM wikipedia/wikidata/wikivoyage/appropedia
-links to local Kiwix URLs where the article exists in a loaded ZIM.
-
-Falls back silently to public URLs when article is unavailable locally.
-Caches positive results only in place_cache.db.
-
-Kiwix catalog is parsed from the OPDS Atom feed at startup and refreshed
-hourly to pick up newly loaded ZIMs without a restart.
-
-Operations note:
-  - After loading a new ZIM, either restart RECON (forces fresh catalog
-    fetch) or wait up to 1 hour for automatic refresh.
-  - To invalidate the wiki cache (e.g. after ZIM update):
-      sqlite3 /opt/recon/data/place_cache.db "DELETE FROM wiki_cache;"
-"""
-import os
-import re
-import sqlite3
-import time
-import xml.etree.ElementTree as ET
-from urllib.parse import unquote, quote
-
-import requests as http_requests
-
-from .utils import setup_logging
-
-logger = setup_logging('recon.wiki_rewrite')
-
-# ── Configuration ───────────────────────────────────────────────────────
-
-KIWIX_BASE = "http://localhost:8430"
-KIWIX_PUBLIC_BASE = "https://wiki.echo6.co"
-KIWIX_CATALOG_URL = f"{KIWIX_BASE}/catalog/v2/entries"
-HEAD_TIMEOUT = 1.5  # seconds
-CATALOG_REFRESH_INTERVAL = 3600  # 1 hour
-
-# OPDS Atom namespace
-_ATOM_NS = "http://www.w3.org/2005/Atom"
-
-# ── ZIM catalog map ─────────────────────────────────────────────────────
-
-_zim_map = {}        # source_type → content_path  e.g. 'wikipedia' → 'wikipedia_en_all_maxi_2026-02'
-_zim_map_ts = 0.0    # last refresh timestamp
-
-# Prefix-to-source-type mapping (order matters: longest prefix first)
-_ZIM_PREFIX_MAP = [
-    ('wikipedia_en_all', 'wikipedia'),
-    ('appropedia_en_all', 'appropedia'),
-    ('wikivoyage_en', 'wikivoyage'),
-    ('wikidata_en', 'wikidata'),
-]
-
-
-def _discover_zims():
-    """Parse Kiwix OPDS Atom catalog to map source types to content paths."""
-    global _zim_map, _zim_map_ts
-
-    try:
-        resp = http_requests.get(KIWIX_CATALOG_URL, timeout=5)
-        if resp.status_code != 200:
-            logger.warning(f"Kiwix catalog returned HTTP {resp.status_code}")
-            return
-
-        root = ET.fromstring(resp.content)
-        new_map = {}
-
-        for entry in root.findall(f"{{{_ATOM_NS}}}entry"):
-            name_el = entry.find(f"{{{_ATOM_NS}}}name")
-            if name_el is None:
-                continue
-            book_name = name_el.text or ""
-
-            # <link type="text/html" href="/content/..."/>
-            content_path = None
-            for link in entry.findall(f"{{{_ATOM_NS}}}link"):
-                if link.get("type") == "text/html":
-                    href = link.get("href", "")
-                    if href.startswith("/content/"):
-                        content_path = href[len("/content/"):]
-                    break
-
-            if not content_path:
-                continue
-
-            # Match book name against known prefixes
-            for prefix, source_type in _ZIM_PREFIX_MAP:
-                if book_name.startswith(prefix):
-                    new_map[source_type] = content_path
-                    break
-
-        _zim_map = new_map
-        _zim_map_ts = time.time()
-        logger.info(f"ZIM catalog refreshed: {new_map}")
-
-    except Exception as e:
-        logger.warning(f"Failed to discover ZIMs from Kiwix catalog: {e}")
-
-
-def _ensure_zim_map():
-    """Lazy-load and refresh ZIM map if stale."""
-    if not _zim_map or (time.time() - _zim_map_ts) > CATALOG_REFRESH_INTERVAL:
-        _discover_zims()
-
-
-# ── Database (wiki_cache in place_cache.db) ─────────────────────────────
-
-_db_conn = None
-
-
-def _get_db():
-    """Return a module-level SQLite connection to place_cache.db (lazy init)."""
-    global _db_conn
-    if _db_conn is not None:
-        return _db_conn
-
-    db_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
-    os.makedirs(db_dir, exist_ok=True)
-    db_path = os.path.join(db_dir, 'place_cache.db')
-
-    _db_conn = sqlite3.connect(db_path, check_same_thread=False)
-    _db_conn.execute("PRAGMA journal_mode=WAL")
-    _db_conn.execute("PRAGMA synchronous=NORMAL")
-    _db_conn.execute("""
-        CREATE TABLE IF NOT EXISTS wiki_cache (
-            source_type TEXT NOT NULL,
-            article_id  TEXT NOT NULL,
-            kiwix_url   TEXT NOT NULL,
-            cached_at   INTEGER NOT NULL,
-            PRIMARY KEY (source_type, article_id)
-        )
-    """)
-    _db_conn.commit()
-    logger.info(f"Wiki cache table ready in {db_path}")
-    return _db_conn
-
-
-# ── URL classification ──────────────────────────────────────────────────
-
-# Patterns for OSM wikipedia/wikidata tag values
-_WIKI_TAG_RE = re.compile(r'^(?:en:)?(.+)$')  # "en:Title" or just "Title"
-_WIKI_URL_RE = re.compile(r'https?://en\.wikipedia\.org/wiki/(.+)')
-_WIKIDATA_TAG_RE = re.compile(r'^(Q\d+)$')
-_WIKIDATA_URL_RE = re.compile(r'https?://(?:www\.)?wikidata\.org/wiki/(Q\d+)')
-_WIKIVOYAGE_URL_RE = re.compile(r'https?://en\.wikivoyage\.org/wiki/(.+)')
-_APPROPEDIA_URL_RE = re.compile(r'https?://(?:www\.)?appropedia\.org/(?:wiki/)?(.+)')
-
-
-def _normalize_article_id(article_id):
-    """Normalize article ID to MediaWiki/Kiwix convention: spaces → underscores."""
-    return article_id.replace(' ', '_')
-
-
-def classify_wiki_link(tag_name, value):
-    """
-    Classify an OSM extratag value into (source_type, article_id) or None.
-
-    tag_name: the extratags key ('wikipedia', 'wikidata', etc.)
-    value: the raw tag value from OSM
-
-    Article IDs are normalized to MediaWiki convention (spaces → underscores).
-    """
-    if not value or not isinstance(value, str):
-        return None
-
-    value = value.strip()
-
-    if tag_name == 'wikidata':
-        m = _WIKIDATA_TAG_RE.match(value)
-        if m:
-            return ('wikidata', m.group(1))
-        m = _WIKIDATA_URL_RE.match(value)
-        if m:
-            return ('wikidata', m.group(1))
-        return None
-
-    if tag_name == 'wikipedia':
-        # URL form: https://en.wikipedia.org/wiki/Title
-        m = _WIKI_URL_RE.match(value)
-        if m:
-            return ('wikipedia', _normalize_article_id(unquote(m.group(1))))
-        # Tag form: "en:Title" or "Title"
-        m = _WIKI_TAG_RE.match(value)
-        if m:
-            return ('wikipedia', _normalize_article_id(m.group(1)))
-        return None
-
-    if tag_name == 'wikivoyage':
-        m = _WIKIVOYAGE_URL_RE.match(value)
-        if m:
-            return ('wikivoyage', _normalize_article_id(unquote(m.group(1))))
-        # Plain tag: "en:Title" or "Title"
-        m = _WIKI_TAG_RE.match(value)
-        if m:
-            return ('wikivoyage', _normalize_article_id(m.group(1)))
-        return None
-
-    if tag_name == 'appropedia':
-        m = _APPROPEDIA_URL_RE.match(value)
-        if m:
-            return ('appropedia', _normalize_article_id(unquote(m.group(1))))
-        return ('appropedia', _normalize_article_id(value))
-
-    return None
-
-
-# ── URL builders ────────────────────────────────────────────────────────
-
-def build_kiwix_url(source_type, article_id):
-    """Build a public Kiwix URL. Returns None if source_type not in ZIM map."""
-    _ensure_zim_map()
-    content_path = _zim_map.get(source_type)
-    if not content_path:
-        return None
-    return f"{KIWIX_PUBLIC_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}"
-
-
-_PUBLIC_URL_TEMPLATES = {
-    'wikipedia':  "https://en.wikipedia.org/wiki/{id}",
-    'wikidata':   "https://www.wikidata.org/wiki/{id}",
-    'wikivoyage': "https://en.wikivoyage.org/wiki/{id}",
-    'appropedia': "https://www.appropedia.org/wiki/{id}",
-}
-
-
-def build_public_url(source_type, article_id):
-    """Build the canonical public URL for a wiki article."""
-    tmpl = _PUBLIC_URL_TEMPLATES.get(source_type)
-    if not tmpl:
-        return None
-    return tmpl.format(id=quote(article_id, safe='/:@!$&\'()*+,;='))
-
-
-# ── Kiwix availability check ───────────────────────────────────────────
-
-def check_kiwix_has_article(source_type, article_id):
-    """
-    Check if an article exists in local Kiwix.
-
-    Returns (bool, url):
-      - (True, kiwix_public_url) if article exists locally
-      - (False, None) if not found or Kiwix unavailable
-
-    Only positive results are cached.
-    """
-    # Check cache first
-    db = _get_db()
-    row = db.execute(
-        "SELECT kiwix_url FROM wiki_cache WHERE source_type=? AND article_id=?",
-        (source_type, article_id)
-    ).fetchone()
-    if row:
-        return (True, row[0])
-
-    # Build local HEAD URL
-    _ensure_zim_map()
-    content_path = _zim_map.get(source_type)
-    if not content_path:
-        return (False, None)
-
-    head_url = f"{KIWIX_BASE}/content/{content_path}/{quote(article_id, safe='/:@!$&\'()*+,;=')}"
-
-    try:
-        resp = http_requests.head(head_url, timeout=HEAD_TIMEOUT, allow_redirects=True)
-        if resp.status_code == 200:
-            kiwix_url = build_kiwix_url(source_type, article_id)
-            # Cache positive result
-            now = int(time.time())
-            db.execute("""
-                INSERT OR REPLACE INTO wiki_cache (source_type, article_id, kiwix_url, cached_at)
-                VALUES (?, ?, ?, ?)
-            """, (source_type, article_id, kiwix_url, now))
-            db.commit()
-            return (True, kiwix_url)
-        else:
-            return (False, None)
-    except Exception as e:
-        logger.debug(f"Kiwix HEAD failed for {source_type}/{article_id}: {e}")
-        return (False, None)
-
-
-# ── Primary entry point ────────────────────────────────────────────────
-
-def rewrite_wiki_link(tag_name, value):
-    """
-    Rewrite an OSM wiki tag value to a local Kiwix URL if available.
-
-    Returns (url, 'local'|'public') or (None, None) if unrecognized.
-    """
-    classified = classify_wiki_link(tag_name, value)
-    if not classified:
-        return (value, 'original')
-
-    source_type, article_id = classified
-
-    # Try local Kiwix
-    found, kiwix_url = check_kiwix_has_article(source_type, article_id)
-    if found and kiwix_url:
-        return (kiwix_url, 'local')
-
-    # Fall back to public URL
-    public_url = build_public_url(source_type, article_id)
-    if public_url:
-        return (public_url, 'public')
-
-    return (value, 'original')
-
-
-# ── Discovery stubs (disabled, for future activation) ───────────────────
-
-def discover_wikivoyage_article(name, category, lat, lon):
-    """
-    Discover a related Wikivoyage article for a place.
-    Enabled by has_wiki_discovery. Currently returns None.
-    """
-    return None
-
-
-def discover_appropedia_article(name, category):
-    """
-    Discover a related Appropedia article for a place.
-    Enabled by has_wiki_discovery. Currently returns None.
-    """
-    return None
--- a/lib/wiki_rewrite_api.py
+++ b/lib/wiki_rewrite_api.py
@ -1,34 +0,0 @@
-"""Wiki-rewrite API — read-only HTTP wrapper over wiki_rewrite.rewrite_wiki_link.
-
-Extraction #5 prep: lets the (future) navi-places service rewrite OSM wiki tags
-to local Kiwix URLs over HTTP instead of importing recon's wiki_rewrite module
-(which talks to Kiwix and the wiki_cache table in /opt/recon/data/place_cache.db).
-Additive only — does not change place_detail's in-process `_enrich_wiki_links`.
-
-  GET /api/wiki-rewrite?tag=<wikipedia|wikidata|wikivoyage|appropedia>&value=<raw>
-
-Public (no auth), matching /api/place/* and /api/wiki-enrich. 400 on missing
-value or unknown tag. No 404 — an unclassifiable value returns the original
-value with status "original" (mirrors rewrite_wiki_link).
-"""
-from flask import Blueprint, request, jsonify
-
-from .wiki_rewrite import rewrite_wiki_link
-
-wiki_rewrite_bp = Blueprint('wiki_rewrite', __name__)
-
-_KNOWN_TAGS = {'wikipedia', 'wikidata', 'wikivoyage', 'appropedia'}
-
-
-@wiki_rewrite_bp.route('/api/wiki-rewrite')
-def api_wiki_rewrite():
-    tag = (request.args.get('tag') or '').strip().lower()
-    value = (request.args.get('value') or '').strip()
-
-    if not value:
-        return jsonify({'error': 'value is required'}), 400
-    if tag not in _KNOWN_TAGS:
-        return jsonify({'error': f"tag must be one of {sorted(_KNOWN_TAGS)}"}), 400
-
-    url, status = rewrite_wiki_link(tag, value)
-    return jsonify({'url': url, 'status': status})
--- a/lib/wiki_rewrite_api_test.py
+++ b/lib/wiki_rewrite_api_test.py
@ -1,73 +0,0 @@
-"""Tests for the /api/wiki-rewrite endpoint (extraction #5 prep).
-
-Plain-assert style (recon's venv has no pytest). Builds a minimal Flask app
-with only wiki_rewrite_bp registered. Mocks `wiki_rewrite.check_kiwix_has_article`
-to control the local-Kiwix-hit vs. fallback paths without touching Kiwix or the
-wiki_cache DB. classify_wiki_link (pure regex) runs for real. Run with pytest,
-or directly:  python -m lib.wiki_rewrite_api_test
-"""
-from flask import Flask
-
-from lib import wiki_rewrite
-from lib.wiki_rewrite_api import wiki_rewrite_bp
-
-
-def _client(kiwix_hit):
-    """kiwix_hit: (found_bool, url) returned by a stubbed check_kiwix_has_article."""
-    wiki_rewrite.check_kiwix_has_article = lambda source_type, article_id: kiwix_hit
-    app = Flask(__name__)
-    app.register_blueprint(wiki_rewrite_bp)
-    return app.test_client()
-
-
-def test_local_kiwix_hit():
-    url = "https://wiki.echo6.co/content/wikipedia/Filer,_Idaho"
-    c = _client((True, url))
-    resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer, Idaho")
-    assert resp.status_code == 200, resp.status_code
-    d = resp.get_json()
-    assert d["status"] == "local"
-    assert d["url"] == url
-
-
-def test_public_fallback_when_not_in_kiwix():
-    c = _client((False, None))  # not in Kiwix -> canonical public URL
-    resp = c.get("/api/wiki-rewrite?tag=wikipedia&value=Filer")
-    assert resp.status_code == 200, resp.status_code
-    d = resp.get_json()
-    assert d["status"] == "public"
-    assert d["url"] == "https://en.wikipedia.org/wiki/Filer"
-
-
-def test_unclassifiable_returns_original():
-    # 'wikidata' requires a Q-id; a non-matching value -> classify None -> original.
-    c = _client((False, None))
-    resp = c.get("/api/wiki-rewrite?tag=wikidata&value=not-a-qid")
-    assert resp.status_code == 200, resp.status_code
-    d = resp.get_json()
-    assert d["status"] == "original"
-    assert d["url"] == "not-a-qid"
-
-
-def test_missing_value_400():
-    c = _client((False, None))
-    assert c.get("/api/wiki-rewrite?tag=wikipedia").status_code == 400
-
-
-def test_unknown_tag_400():
-    c = _client((False, None))
-    assert c.get("/api/wiki-rewrite?tag=facebook&value=x").status_code == 400
-
-
-if __name__ == "__main__":
-    failures = 0
-    for _name, _fn in sorted(globals().items()):
-        if _name.startswith("test_") and callable(_fn):
-            try:
-                _fn()
-                print(f"PASS {_name}")
-            except Exception as exc:  # noqa: BLE001
-                failures += 1
-                print(f"FAIL {_name}: {exc!r}")
-    print("OK" if failures == 0 else f"{failures} FAILED")
-    raise SystemExit(1 if failures else 0)