From 79ff756a388d724b0bb94f3a121d135e4c6cf401 Mon Sep 17 00:00:00 2001
From: K7ZVX <matt@echo6.co>
Date: Tue, 5 May 2026 22:06:50 +0000
Subject: [PATCH] fix: Remove hard-coded token limits on LLM responses

- Add max_response_tokens config (8192) to LLMConfig
- Use config value in router.py instead of hardcoded 500
- Update base.py default from 300 to 8192
- Lets LLM generate full responses; chunker handles size limits

Fixes truncated responses like Here are three nodes in the freq

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 meshai/backends/base.py | 2 +-
 meshai/config.py        | 1 +
 meshai/router.py        | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/meshai/backends/base.py b/meshai/backends/base.py
index 6843e00..d50bb3f 100644
--- a/meshai/backends/base.py
+++ b/meshai/backends/base.py
@@ -12,7 +12,7 @@ class LLMBackend(ABC):
         self,
         messages: list[dict],
         system_prompt: str,
-        max_tokens: int = 300,
+        max_tokens: int = 8192,
         user_id: Optional[str] = None,
     ) -> str:
         """Generate a response from the LLM.
diff --git a/meshai/config.py b/meshai/config.py
index ce6c9af..d464386 100644
--- a/meshai/config.py
+++ b/meshai/config.py
@@ -94,6 +94,7 @@ class LLMConfig:
     base_url: str = "https://api.openai.com/v1"
     model: str = "gpt-4o-mini"
     timeout: int = 30
+    max_response_tokens: int = 8192  # Let LLM generate full responses; chunker handles size
 
     system_prompt: str = (
         "RESPONSE RULES:\n"
diff --git a/meshai/router.py b/meshai/router.py
index 4c19495..4cb0f9e 100644
--- a/meshai/router.py
+++ b/meshai/router.py
@@ -643,7 +643,7 @@ class MessageRouter:
             response = await self.llm.generate(
                 messages=history,
                 system_prompt=system_prompt,
-                max_tokens=500,
+                max_tokens=self.config.llm.max_response_tokens,
             )
         except asyncio.TimeoutError:
             logger.error("LLM request timed out")