Add HTML escaping to status page and prompt injection guard to router

- 5a: Import html.escape and apply to all values rendered into the HTML template in _serve_status_page() — uptime, counts, status text, node counts, errors. Prevents XSS via crafted node names or errors. - 5b: Add basic prompt injection detection to _clean_query() with configurable safety.prompt_injection_guard (default: on). Detects patterns like "ignore all previous", "you are now", "system prompt:", etc. Truncates query before the injection phrase and logs a warning. Not foolproof but better than nothing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-11 01:14:45 +02:00 · 2026-02-23 20:17:46 +00:00 · 2026-02-23 20:17:46 +00:00 · 32cd2b3427
commit 32cd2b3427
parent a71f92a77a
3 changed files with 59 additions and 10 deletions
--- a/meshai/router.py
+++ b/meshai/router.py
@ -33,6 +33,17 @@ class RouteResult:
    query: Optional[str] = None  # For LLM, the cleaned query


+# Patterns that suggest prompt injection attempts
+_INJECTION_PATTERNS = [
+    re.compile(r"ignore\s+(all\s+)?previous", re.IGNORECASE),
+    re.compile(r"ignore\s+your\s+instructions", re.IGNORECASE),
+    re.compile(r"disregard\s+(all\s+)?previous", re.IGNORECASE),
+    re.compile(r"you\s+are\s+now\b", re.IGNORECASE),
+    re.compile(r"new\s+instructions?\s*:", re.IGNORECASE),
+    re.compile(r"system\s*prompt\s*:", re.IGNORECASE),
+]
+
+
 class MessageRouter:
    """Routes incoming messages to appropriate handlers."""

@ -186,12 +197,28 @@ class MessageRouter:
            logger.debug(f"Persisted summary for {user_id}")

    def _clean_query(self, text: str) -> str:
-        """Remove @mention from query text."""
+        """Remove @mention and check for prompt injection."""
        # Remove @botname mention
        cleaned = self._mention_pattern.sub("", text)
        # Clean up extra whitespace
        cleaned = " ".join(cleaned.split())
-        return cleaned.strip()
+        cleaned = cleaned.strip()
+
+        # Check for prompt injection if guard is enabled
+        if self.config.safety.prompt_injection_guard:
+            for pattern in _INJECTION_PATTERNS:
+                if pattern.search(cleaned):
+                    logger.warning(
+                        f"Possible prompt injection detected: {cleaned[:80]}..."
+                    )
+                    # Truncate to just the part before the injection pattern
+                    match = pattern.search(cleaned)
+                    cleaned = cleaned[:match.start()].strip()
+                    if not cleaned:
+                        cleaned = "Hello"
+                    break
+
+        return cleaned

    def _make_command_context(self, message: MeshMessage) -> CommandContext:
        """Create command context from message."""