implement three-tier cascade: Qdrant → Kiwix → SearXNG

- Add Kiwix integration with HTML parser for offline Wikipedia search - Add SearXNG integration for web search fallback - Cascade triggered when FlashRank top-1 score < 0.5 threshold - Context tagging: [DOMAIN_KNOWLEDGE], [OFFLINE_WIKI], [WEB_SEARCH] - Cascade decision logging to /opt/recon/logs/cascade.jsonl - Graceful degradation: skip unavailable tiers - Version bumped to 5.0.0 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-20 06:34:40 +02:00 · 2026-05-07 22:33:14 +00:00 · 2026-05-07 22:33:14 +00:00 · 81611110eb
commit 81611110eb
parent 5e5399de5c
1 changed files with 1653 additions and 1119 deletions
--- a/tools/recon_rag_tool.py
+++ b/tools/recon_rag_tool.py
@ -1,8 +1,8 @@
 """
 title: RECON Knowledge Base
 author: Echo6
-version: 4.3.0
-description: RAG filter that searches the RECON knowledge base and injects reference material into Aurora's context. Emits citations with PDF download links. Supports intent-based metadata filtering, FlashRank neural reranking with MMR diversity, Ollama-powered query expansion, transcript source boosting, semantic query routing with inline navigation, and address book place resolution.
+version: 5.0.0
+description: RAG filter with three-tier cascade: Qdrant (domain knowledge) → Kiwix (offline wiki) → SearXNG (web search). Supports intent-based metadata filtering, FlashRank neural reranking with MMR diversity, Ollama-powered query expansion, transcript source boosting, semantic query routing with inline navigation, and address book place resolution.
 """

 import logging
@ -10,8 +10,13 @@ import json
 import math
 import re
 import threading
+import html
+from datetime import datetime
+from html.parser import HTMLParser
+from pathlib import Path
 from typing import Optional, Callable, Awaitable
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import quote, unquote

 import requests
 from pydantic import BaseModel, Field
@ -22,6 +27,26 @@ log = logging.getLogger(__name__)
 # even if OWI instantiates separate Filter objects per call.
 _SOURCE_STORE: dict[str, list] = {}

+# ── CASCADE CONFIGURATION (v5.0.0) ───────────────────────────────────────────
+# FlashRank score threshold for Tier 1 (Qdrant). Below this, fall through to Tier 2.
+# Based on calibration: RECON queries cluster at 0.95-1.0, misses below 0.3.
+# 0.5 is conservative - will let more through to Kiwix than strictly necessary.
+CASCADE_CONFIDENCE_THRESHOLD = 0.5
+
+# Kiwix-serve configuration
+KIWIX_BASE_URL = "http://localhost:8430"
+KIWIX_SEARCH_TIMEOUT = 5  # seconds
+KIWIX_ARTICLE_TIMEOUT = 5  # seconds
+KIWIX_MAX_RESULTS = 3
+
+# SearXNG configuration
+SEARXNG_URL = "http://192.168.1.102:8080"
+SEARXNG_TIMEOUT = 5  # seconds
+SEARXNG_MAX_RESULTS = 5
+
+# Cascade logging
+CASCADE_LOG_PATH = Path("/opt/recon/logs/cascade.jsonl")
+
 # ── Semantic Query Router (v4.3.0) ───────────────────────────────────────────
 ROUTE_EXAMPLES = {
    "nav_route": [
@ -359,6 +384,247 @@ def _address_book_lookup(query: str, address_book_url: str) -> dict | None:

 # ── End router/nav code ──────────────────────────────────────────────────────

+# ── Kiwix Search Helpers (v5.0.0) ────────────────────────────────────────────
+
+class _KiwixResultParser(HTMLParser):
+    """Parse Kiwix search results HTML to extract articles."""
+    def __init__(self):
+        super().__init__()
+        self.results = []
+        self._in_results = False
+        self._in_li = False
+        self._in_cite = False
+        self._in_info = False
+        self._current = {}
+        self._capture_text = False
+
+    def handle_starttag(self, tag, attrs):
+        attrs_dict = dict(attrs)
+        if tag == "div" and "results" in attrs_dict.get("class", ""):
+            self._in_results = True
+        elif self._in_results and tag == "li":
+            self._in_li = True
+            self._current = {"title": "", "url": "", "snippet": "", "word_count": ""}
+        elif self._in_li and tag == "a" and not self._current.get("url"):
+            self._current["url"] = attrs_dict.get("href", "")
+            self._capture_text = True
+        elif self._in_li and tag == "cite":
+            self._in_cite = True
+            self._capture_text = True
+        elif self._in_li and tag == "div" and "informations" in attrs_dict.get("class", ""):
+            self._in_info = True
+            self._capture_text = True
+
+    def handle_endtag(self, tag):
+        if tag == "div" and self._in_results and not self._in_li:
+            self._in_results = False
+        elif tag == "li" and self._in_li:
+            if self._current.get("url"):
+                self.results.append(self._current)
+            self._current = {}
+            self._in_li = False
+        elif tag == "a" and self._capture_text and not self._in_cite:
+            self._capture_text = False
+        elif tag == "cite":
+            self._in_cite = False
+            self._capture_text = False
+        elif tag == "div" and self._in_info:
+            self._in_info = False
+            self._capture_text = False
+
+    def handle_data(self, data):
+        if self._capture_text and self._in_li:
+            text = data.strip()
+            if self._in_cite:
+                self._current["snippet"] += text + " "
+            elif self._in_info:
+                self._current["word_count"] = text
+            elif not self._current.get("title"):
+                self._current["title"] = text
+
+
+def _strip_html_tags(html_content: str) -> str:
+    """Simple HTML to plain text conversion using stdlib."""
+    # Remove script and style elements
+    text = re.sub(r'<script[^>]*>.*?</script>', '', html_content, flags=re.DOTALL | re.IGNORECASE)
+    text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
+    # Remove tags
+    text = re.sub(r'<[^>]+>', ' ', text)
+    # Decode entities
+    text = html.unescape(text)
+    # Normalize whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+
+
+def _fetch_kiwix_books() -> list[str]:
+    """Fetch list of available books from kiwix-serve catalog."""
+    try:
+        resp = requests.get(
+            f"{KIWIX_BASE_URL}/catalog/v2/entries",
+            timeout=KIWIX_SEARCH_TIMEOUT,
+        )
+        resp.raise_for_status()
+        # Extract book names from href attributes
+        books = re.findall(r'href="/content/([^"]+)"', resp.text)
+        return list(set(books))  # dedupe
+    except Exception as e:
+        log.warning(f"Failed to fetch Kiwix book list: {e}")
+        return []
+
+
+def _search_kiwix_book(book: str, query: str, limit: int = 5) -> list[dict]:
+    """Search a single Kiwix book and return results."""
+    try:
+        resp = requests.get(
+            f"{KIWIX_BASE_URL}/search",
+            params={"content": book, "pattern": query, "limit": limit},
+            timeout=KIWIX_SEARCH_TIMEOUT,
+        )
+        if resp.status_code != 200:
+            return []
+
+        parser = _KiwixResultParser()
+        parser.feed(resp.text)
+
+        # Add book name to results
+        for r in parser.results:
+            r["book"] = book
+
+        return parser.results
+    except Exception as e:
+        log.warning(f"Kiwix search failed for {book}: {e}")
+        return []
+
+
+def _fetch_kiwix_article(url_path: str) -> str:
+    """Fetch and extract text content from a Kiwix article."""
+    try:
+        resp = requests.get(
+            f"{KIWIX_BASE_URL}{url_path}",
+            timeout=KIWIX_ARTICLE_TIMEOUT,
+        )
+        resp.raise_for_status()
+
+        # Extract main content - try to find article body
+        content = resp.text
+
+        # Try to extract just the main content area
+        main_match = re.search(r'<main[^>]*>(.*?)</main>', content, re.DOTALL | re.IGNORECASE)
+        if main_match:
+            content = main_match.group(1)
+        else:
+            # Try article tag
+            article_match = re.search(r'<article[^>]*>(.*?)</article>', content, re.DOTALL | re.IGNORECASE)
+            if article_match:
+                content = article_match.group(1)
+            else:
+                # Try body content div
+                body_match = re.search(r'<div[^>]*class="[^"]*content[^"]*"[^>]*>(.*?)</div>', content, re.DOTALL | re.IGNORECASE)
+                if body_match:
+                    content = body_match.group(1)
+
+        return _strip_html_tags(content)[:4000]  # Limit to 4000 chars
+    except Exception as e:
+        log.warning(f"Failed to fetch Kiwix article {url_path}: {e}")
+        return ""
+
+
+def _search_kiwix(query: str, books: list[str]) -> list[dict]:
+    """Search Kiwix across specified books and return merged results."""
+    all_results = []
+
+    # Prioritize English Wikipedia and other English content
+    priority_books = []
+    other_books = []
+    for book in books:
+        if "wikipedia_en" in book or "_en_" in book or "_eng_" in book:
+            priority_books.append(book)
+        elif not any(lang in book for lang in ["_af_", "_de_", "_fr_", "_es_"]):
+            other_books.append(book)
+
+    # Search priority books first
+    for book in priority_books[:3]:  # Limit to top 3 priority books
+        results = _search_kiwix_book(book, query, limit=5)
+        all_results.extend(results)
+
+    # If not enough results, try other books
+    if len(all_results) < KIWIX_MAX_RESULTS:
+        for book in other_books[:2]:
+            results = _search_kiwix_book(book, query, limit=3)
+            all_results.extend(results)
+
+    return all_results[:KIWIX_MAX_RESULTS * 2]  # Return up to 6 for further filtering
+
+
+# ── SearXNG Search Helpers (v5.0.0) ──────────────────────────────────────────
+
+def _search_searxng(query: str) -> list[dict]:
+    """Search SearXNG and return results. Returns empty list on failure."""
+    try:
+        resp = requests.get(
+            f"{SEARXNG_URL}/search",
+            params={"q": query, "format": "json"},
+            timeout=SEARXNG_TIMEOUT,
+        )
+        if resp.status_code != 200:
+            log.warning(f"SearXNG returned status {resp.status_code}")
+            return []
+
+        data = resp.json()
+        results = data.get("results", [])
+
+        # Format results
+        formatted = []
+        for r in results[:SEARXNG_MAX_RESULTS]:
+            formatted.append({
+                "title": r.get("title", ""),
+                "url": r.get("url", ""),
+                "snippet": r.get("content", ""),
+                "engines": r.get("engines", []),
+                "score": r.get("score", 0),
+            })
+
+        return formatted
+    except requests.Timeout:
+        log.warning("SearXNG request timed out (offline or slow)")
+        return []
+    except requests.ConnectionError:
+        log.warning("SearXNG connection failed (offline)")
+        return []
+    except Exception as e:
+        log.warning(f"SearXNG search failed: {e}")
+        return []
+
+
+# ── Cascade Logging (v5.0.0) ─────────────────────────────────────────────────
+
+def _log_cascade_decision(
+    query: str,
+    router_intent: str,
+    top_1_score: float,
+    tier_used: int,
+    num_results: int,
+):
+    """Log cascade decision to JSONL file for threshold tuning."""
+    try:
+        CASCADE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
+        entry = {
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+            "query": query,
+            "router_intent": router_intent,
+            "top_1_score": round(top_1_score, 4),
+            "tier_used": tier_used,
+            "num_results": num_results,
+        }
+        with open(CASCADE_LOG_PATH, "a") as f:
+            f.write(json.dumps(entry) + "\n")
+    except Exception as e:
+        log.warning(f"Failed to log cascade decision: {e}")
+
+
+# ── End cascade helpers ──────────────────────────────────────────────────────
+
 # Subdomains excluded from Medical results when tactical context detected
 _OBSTETRIC_SUBDOMAINS = [
    "Obstetrics", "Midwifery", "Pregnancy", "Pregnancy Care",
@ -493,11 +759,27 @@ class Filter:
            default="http://100.64.0.24:8420",
            description="RECON address book API base URL",
        )
+        cascade_enabled: bool = Field(
+            default=True,
+            description="Enable three-tier cascade (Qdrant → Kiwix → SearXNG)",
+        )
+        cascade_threshold: float = Field(
+            default=0.5,
+            description="FlashRank score threshold for cascade fallthrough",
+        )

    def __init__(self):
        self.valves = self.Valves()
        self._expansion_cache: dict[str, list[str]] = {}
        self._ranker = None
+        self._kiwix_books: list[str] | None = None
+
+    def _get_kiwix_books(self) -> list[str]:
+        """Get cached list of Kiwix books, fetching on first use."""
+        if self._kiwix_books is None:
+            self._kiwix_books = _fetch_kiwix_books()
+            log.info(f"Loaded {len(self._kiwix_books)} Kiwix books")
+        return self._kiwix_books

    def _embed_query(self, text: str) -> list:
        """Embed a query string using TEI."""
@ -544,7 +826,7 @@ class Filter:
        for item in ranked[:self.valves.rerank_top_n]:
            idx = item["id"]
            result_copy = dict(results[idx])
-            result_copy["score"] = item["score"]
+            result_copy["score"] = float(item["score"])
            reranked.append(result_copy)

        return reranked
@ -756,7 +1038,7 @@ class Filter:
                    log.warning(f"Expanded search for {term!r} failed: {e}")
        return results

-    def _format_context(self, results: list) -> str:
+    def _format_context(self, results: list, tier_tag: str = "DOMAIN_KNOWLEDGE") -> str:
        """Format search results into a context block for the system prompt."""
        if not results:
            return ""
@ -814,7 +1096,69 @@ class Filter:
            else:
                dl_str = ""

-            block = f"[{i}] {citation} (relevance: {score:.2f})\n{summary}{facts_str}{domain_str}{dl_str}"
+            block = f"[{tier_tag}:{i}] {citation} (relevance: {score:.2f})\n{summary}{facts_str}{domain_str}{dl_str}"
+            blocks.append(block)
+
+        return "\n\n".join(blocks)
+
+    def _format_kiwix_context(self, results: list[dict]) -> str:
+        """Format Kiwix search results into a context block."""
+        if not results:
+            return ""
+
+        blocks = []
+        for i, r in enumerate(results, 1):
+            title = r.get("title", "Unknown")
+            snippet = r.get("snippet", "").strip()
+            book = r.get("book", "")
+            url_path = r.get("url", "")
+
+            # Build wiki URL
+            if url_path:
+                # Extract article path from /content/book/path
+                path_match = re.search(r'/content/[^/]+/(.+)$', url_path)
+                if path_match:
+                    article_path = path_match.group(1)
+                    wiki_url = f"https://wiki.echo6.co/viewer#{book}/{article_path}"
+                else:
+                    wiki_url = f"https://wiki.echo6.co/viewer#{book}"
+            else:
+                wiki_url = ""
+
+            # Fetch article content if available
+            content = ""
+            if url_path:
+                content = _fetch_kiwix_article(url_path)
+                if content:
+                    content = content[:1500]  # Limit per article
+
+            if not content:
+                content = snippet
+
+            block = f"[OFFLINE_WIKI:{i}] {title}\n{content}"
+            if wiki_url:
+                block += f"\nSource: {wiki_url}"
+            blocks.append(block)
+
+        return "\n\n".join(blocks)
+
+    def _format_searxng_context(self, results: list[dict]) -> str:
+        """Format SearXNG search results into a context block."""
+        if not results:
+            return ""
+
+        blocks = []
+        for i, r in enumerate(results, 1):
+            title = r.get("title", "Unknown")
+            snippet = r.get("snippet", "")
+            url = r.get("url", "")
+            engines = r.get("engines", [])
+
+            engine_str = f" (via {', '.join(engines[:2])})" if engines else ""
+
+            block = f"[WEB_SEARCH:{i}] {title}{engine_str}\n{snippet}"
+            if url:
+                block += f"\nSource: {url}"
            blocks.append(block)

        return "\n\n".join(blocks)
@ -838,11 +1182,14 @@ class Filter:
        if not query or len(query.strip()) < 3:
            return body

+        router_intent = "rag_search"
+
        # ── ROUTER GATE (v4.3.0) ─────────────────────────────────────────
        if self.valves.router_enabled:
            route, confidence = _classify_query(
                query, self.valves.tei_url, self.valves.router_threshold
            )
+            router_intent = route
            log.info(f"Router: {query!r} → {route} ({confidence:.3f})")

            if route == "direct_answer":
@ -903,6 +1250,11 @@ class Filter:
                }
            )

+        tier_used = 1
+        top_1_score = 0.0
+        final_context = ""
+        final_results = []
+
        try:
            vector = self._embed_query(query)

@ -975,33 +1327,112 @@ class Filter:
                results = _rerank_by_keyword_overlap(query, results)
                results = results[:self.valves.top_k]

-            # Store results for outlet citations (module-level, keyed by chat_id)
-            chat_id = body.get("chat_id", body.get("metadata", {}).get("chat_id", ""))
-            if chat_id:
-                _SOURCE_STORE[chat_id] = results
+            # Get top-1 score for cascade decision
+            top_1_score = results[0]["score"] if results else 0.0

-            # Build context block
-            context = self._format_context(results)
+            # ── CASCADE DECISION POINT (v5.0.0) ──────────────────────────────
+            if self.valves.cascade_enabled and top_1_score < self.valves.cascade_threshold:
+                # Tier 1 score too low, try Tier 2 (Kiwix)
+                log.info(f"Cascade: Tier 1 score {top_1_score:.3f} < {self.valves.cascade_threshold}, trying Kiwix")

-            if context:
-                rag_prompt = (
-                    "You have access to the RECON knowledge base — a curated library of military field manuals, "
-                    "survival guides, preparedness literature, and video transcripts. Answer the user's question using "
-                    "the reference material below. Reference sources using [1], [2], [3] etc. matching the "
-                    "numbered sources provided. Use these numbers inline in your response.\n\n"
-                    "If the reference material doesn't adequately answer the question, say so explicitly rather "
-                    "than filling gaps with general knowledge.\n\n"
-                    "---REFERENCE MATERIAL---\n\n"
-                    f"{context}\n\n"
-                    "---END REFERENCE MATERIAL---"
-                )
+                if __event_emitter__:
+                    await __event_emitter__(
+                        {"type": "status", "data": {"description": "Searching offline encyclopedia...", "done": False}}
+                    )
+
+                kiwix_results = _search_kiwix(query, self._get_kiwix_books())
+
+                if kiwix_results:
+                    tier_used = 2
+                    final_context = self._format_kiwix_context(kiwix_results[:KIWIX_MAX_RESULTS])
+                    log.info(f"Cascade: Tier 2 (Kiwix) returned {len(kiwix_results)} results")
+                else:
+                    # Tier 2 failed, try Tier 3 (SearXNG)
+                    log.info("Cascade: Tier 2 empty, trying SearXNG")
+
+                    if __event_emitter__:
+                        await __event_emitter__(
+                            {"type": "status", "data": {"description": "Searching the web...", "done": False}}
+                        )
+
+                    searxng_results = _search_searxng(query)
+
+                    if searxng_results:
+                        tier_used = 3
+                        final_context = self._format_searxng_context(searxng_results)
+                        log.info(f"Cascade: Tier 3 (SearXNG) returned {len(searxng_results)} results")
+                    else:
+                        # All tiers exhausted, fall back to whatever Tier 1 had
+                        log.info("Cascade: All tiers exhausted, using Tier 1 results")
+                        tier_used = 1
+                        final_context = self._format_context(results, "DOMAIN_KNOWLEDGE")
+                        final_results = results
+            else:
+                # Tier 1 score good enough, use Qdrant results
+                tier_used = 1
+                final_context = self._format_context(results, "DOMAIN_KNOWLEDGE")
+                final_results = results
+
+            # Store results for outlet citations (only for Tier 1)
+            if tier_used == 1:
+                chat_id = body.get("chat_id", body.get("metadata", {}).get("chat_id", ""))
+                if chat_id:
+                    _SOURCE_STORE[chat_id] = final_results
+
+            # Log cascade decision
+            _log_cascade_decision(
+                query=query,
+                router_intent=router_intent,
+                top_1_score=top_1_score,
+                tier_used=tier_used,
+                num_results=len(results) if tier_used == 1 else (len(kiwix_results) if tier_used == 2 else len(searxng_results) if tier_used == 3 else 0),
+            )
+
+            # Build the RAG prompt with tier-appropriate instructions
+            if final_context:
+                if tier_used == 1:
+                    rag_prompt = (
+                        "You have access to the RECON knowledge base — a curated library of military field manuals, "
+                        "survival guides, preparedness literature, and video transcripts. Answer the user's question using "
+                        "the reference material below. Reference sources using [DOMAIN_KNOWLEDGE:1], [DOMAIN_KNOWLEDGE:2], etc.\n\n"
+                        "If the reference material doesn't adequately answer the question, say so explicitly rather "
+                        "than filling gaps with general knowledge.\n\n"
+                        "---REFERENCE MATERIAL---\n\n"
+                        f"{final_context}\n\n"
+                        "---END REFERENCE MATERIAL---"
+                    )
+                elif tier_used == 2:
+                    rag_prompt = (
+                        "The RECON domain knowledge base did not have high-confidence results for this query. "
+                        "The following information comes from offline Wikipedia/encyclopedia sources (Kiwix). "
+                        "Reference sources using [OFFLINE_WIKI:1], [OFFLINE_WIKI:2], etc.\n\n"
+                        "Note: This is general encyclopedia content, not domain-specific preparedness material.\n\n"
+                        "---OFFLINE WIKI CONTENT---\n\n"
+                        f"{final_context}\n\n"
+                        "---END OFFLINE WIKI CONTENT---"
+                    )
+                else:  # tier_used == 3
+                    rag_prompt = (
+                        "Neither the RECON knowledge base nor offline encyclopedias had relevant content. "
+                        "The following information comes from a live web search. Reference sources using [WEB_SEARCH:1], etc.\n\n"
+                        "Note: Web search results may be less reliable than curated sources. Verify important information.\n\n"
+                        "---WEB SEARCH RESULTS---\n\n"
+                        f"{final_context}\n\n"
+                        "---END WEB SEARCH RESULTS---"
+                    )
            else:
                rag_prompt = (
                    "You have access to the RECON knowledge base, but no relevant reference material was "
-                    "found for this query. Answer from your general knowledge and clearly flag that your "
-                    "response is NOT backed by the RECON reference library."
+                    "found for this query in any tier (domain knowledge, offline wiki, or web search). "
+                    "Answer from your general knowledge and clearly flag that your response is NOT backed by references."
                )

+            # Add source priority instruction
+            rag_prompt += (
+                "\n\nSource priority: When sources overlap, prefer DOMAIN_KNOWLEDGE over OFFLINE_WIKI over WEB_SEARCH. "
+                "Always cite which tier your information came from."
+            )
+
            # Inject into system message
            system_msg = next(
                (m for m in messages if m.get("role") == "system"), None
@ -1013,8 +1444,10 @@ class Filter:
                    0, {"role": "system", "content": rag_prompt}
                )

+            # Emit final status
            if __event_emitter__:
-                status_msg = f"Found {len(results)} reference{'s' if len(results) != 1 else ''}" if results else "No matching references found"
+                tier_names = {1: "RECON", 2: "Kiwix", 3: "Web"}
+                status_msg = f"Found results from {tier_names.get(tier_used, 'unknown')} (Tier {tier_used})"
                await __event_emitter__(
                    {
                        "type": "status",
@ -1117,3 +1550,104 @@ class Filter:
                log.warning(f"Failed to emit citation (id={pid}): {e}")

        return body
+
+
+# ── TEST BLOCK ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    import asyncio
+
+    # Test queries for each tier
+    TEST_QUERIES = [
+        ("tourniquet application steps", "Should hit Tier 1 (RECON)"),
+        ("population of Ukraine", "Should hit Tier 2 (Kiwix)"),
+        ("history of the Winter War between Finland and Russia", "Should hit Tier 2 (Kiwix)"),
+        ("latest iPhone reviews 2026", "Should hit Tier 3 (SearXNG)"),
+        ("compass declination adjustment", "Should hit Tier 1 (RECON)"),
+        ("what is the Coriolis effect", "Could go either way"),
+    ]
+
+    async def run_tests():
+        f = Filter()
+        results = []
+
+        print("=" * 70)
+        print("CASCADE TEST RESULTS")
+        print("=" * 70)
+
+        for query, expected in TEST_QUERIES:
+            print(f"\n{'─' * 70}")
+            print(f"Query: {query}")
+            print(f"Expected: {expected}")
+            print("─" * 70)
+
+            # Simulate a request body
+            body = {
+                "messages": [
+                    {"role": "user", "content": query}
+                ],
+                "chat_id": f"test_{hash(query)}",
+            }
+
+            try:
+                # Run through inlet
+                result_body = await f.inlet(body)
+
+                # Extract what was injected
+                system_msg = next(
+                    (m for m in result_body.get("messages", []) if m.get("role") == "system"),
+                    None
+                )
+
+                if system_msg:
+                    content = system_msg.get("content", "")
+
+                    # Determine tier used
+                    if "[DOMAIN_KNOWLEDGE:" in content:
+                        tier = 1
+                    elif "[OFFLINE_WIKI:" in content:
+                        tier = 2
+                    elif "[WEB_SEARCH:" in content:
+                        tier = 3
+                    else:
+                        tier = 0
+
+                    print(f"Tier Used: {tier}")
+
+                    # Get first 200 chars of context
+                    context_start = content.find("---")
+                    if context_start > 0:
+                        context_preview = content[context_start:context_start+300]
+                        print(f"Context Preview: {context_preview[:200]}...")
+
+                    results.append({
+                        "query": query,
+                        "expected": expected,
+                        "tier": tier,
+                    })
+                else:
+                    print("No system message injected")
+                    results.append({
+                        "query": query,
+                        "expected": expected,
+                        "tier": None,
+                    })
+
+            except Exception as e:
+                print(f"ERROR: {e}")
+                results.append({
+                    "query": query,
+                    "expected": expected,
+                    "tier": None,
+                    "error": str(e),
+                })
+
+        print("\n" + "=" * 70)
+        print("SUMMARY")
+        print("=" * 70)
+        for r in results:
+            tier_str = f"Tier {r['tier']}" if r.get('tier') else "ERROR"
+            print(f"  {r['query'][:40]:<40} → {tier_str}")
+
+        return results
+
+    asyncio.run(run_tests())