feat: Hybrid RAG knowledge base, sentence-aware chunking, MeshMonitor HTTP sync

Knowledge Base: - Hybrid FTS5 + vector search using sqlite-vec and bge-small-en-v1.5 - Reciprocal Rank Fusion for result merging - Domain-aware query construction handles typos - Configurable weights for keyword vs semantic matching Message Chunking: - Sentence-aware splitting respects message boundaries - Continuation prompts for long responses - Natural follow-up detection (yes, ok, continue, more, etc.) - Per-user continuation state management MeshMonitor Integration: - HTTP API trigger sync (replaces file-based triggers.json) - Dynamic refresh interval - Trigger injection into LLM prompt Other: - Updated system prompt for better response length control - Simplified responder to handle message lists - Updated README with new features and architecture diagram - Cleaned up config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-11 09:24:44 +02:00 · 2026-05-04 07:44:12 +00:00 · 2026-05-04 07:44:12 +00:00 · 0e36869a5f
commit 0e36869a5f
parent e65a558c6f
14 changed files with 986 additions and 464 deletions
--- a/meshai/router.py
+++ b/meshai/router.py
@ -13,6 +13,7 @@ from .config import Config
 from .connector import MeshConnector, MeshMessage
 from .context import MeshContext
 from .history import ConversationHistory
+from .chunker import chunk_response, ContinuationState

 logger = logging.getLogger(__name__)

@ -65,6 +66,7 @@ class MessageRouter:
        llm_backend: LLMBackend,
        context: MeshContext = None,
        meshmonitor_sync=None,
+        knowledge=None,
    ):
        self.config = config
        self.connector = connector
@ -73,6 +75,8 @@ class MessageRouter:
        self.llm = llm_backend
        self.context = context
        self.meshmonitor_sync = meshmonitor_sync
+        self.knowledge = knowledge
+        self.continuations = ContinuationState(max_continuations=3)


    def should_respond(self, message: MeshMessage) -> bool:
@ -111,6 +115,30 @@ class MessageRouter:

        return True

+    def check_continuation(self, message) -> list[str] | None:
+        """Check if this is a continuation request and return messages if so.
+
+        Returns:
+            List of messages to send, or None if not a continuation
+        """
+        user_id = message.sender_id
+        text = message.text.strip()
+
+        logger.info(f"check_continuation: user={user_id}, text='{text[:30]}', has_pending={self.continuations.has_pending(user_id)}")
+
+        if self.continuations.has_pending(user_id):
+            if self.continuations.is_continuation_request(text):
+                result = self.continuations.get_continuation(user_id)
+                if result:
+                    messages, _ = result
+                    return messages
+                # Max continuations reached, return None to fall through
+            else:
+                # User asked something new, clear pending continuation
+                self.continuations.clear(user_id)
+
+        return None
+
    async def route(self, message: MeshMessage) -> RouteResult:
        """Route a message and generate response.

@ -208,6 +236,23 @@ class MessageRouter:
                    "\n\n[No recent mesh traffic observed yet.]"
                )

+
+
+        # 5. Knowledge base retrieval
+        if self.knowledge and query:
+            results = self.knowledge.search(query)
+            if results:
+                chunks = "\n\n".join(
+                    f"[{r['title']}]: {r['excerpt']}" for r in results
+                )
+                system_prompt += (
+                    "\n\nREFERENCE KNOWLEDGE - Answer using this information:\n"
+                    + chunks
+                )
+
+        # DEBUG: Log system prompt status
+        logger.warning(f"SYSTEM PROMPT LENGTH: {len(system_prompt)} chars")
+        logger.warning(f"HAS REFERENCE KNOWLEDGE: {'REFERENCE KNOWLEDGE' in system_prompt}")
        try:
            response = await self.llm.generate(
                messages=history,
@ -227,7 +272,21 @@ class MessageRouter:
        # Persist summary if one was created/updated
        await self._persist_summary(message.sender_id)

-        return response
+        # Chunk the response with sentence awareness
+        messages, remaining = chunk_response(
+            response,
+            max_chars=self.config.response.max_length,
+            max_messages=self.config.response.max_messages,
+        )
+
+        # Store remaining content for continuation
+        if remaining:
+            logger.info(f"Storing continuation for {message.sender_id}: {len(remaining)} chars remaining")
+            self.continuations.store(message.sender_id, remaining)
+        else:
+            logger.info(f"No remaining content for {message.sender_id}")
+
+        return messages

    async def _persist_summary(self, user_id: str) -> None:
        """Persist any cached summary to the database.