fix: Remove hard-coded token limits on LLM responses

- Add max_response_tokens config (8192) to LLMConfig - Use config value in router.py instead of hardcoded 500 - Update base.py default from 300 to 8192 - Lets LLM generate full responses; chunker handles size limits Fixes truncated responses like Here are three nodes in the freq Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-21 23:24:44 +02:00 · 2026-05-05 22:06:50 +00:00 · 2026-05-05 22:06:50 +00:00 · 79ff756a38
commit 79ff756a38
parent 70c0ab3047
3 changed files with 3 additions and 2 deletions
--- a/meshai/backends/base.py
+++ b/meshai/backends/base.py
@ -12,7 +12,7 @@ class LLMBackend(ABC):
        self,
        messages: list[dict],
        system_prompt: str,
-        max_tokens: int = 300,
+        max_tokens: int = 8192,
        user_id: Optional[str] = None,
    ) -> str:
        """Generate a response from the LLM.
--- a/meshai/config.py
+++ b/meshai/config.py
@ -94,6 +94,7 @@ class LLMConfig:
    base_url: str = "https://api.openai.com/v1"
    model: str = "gpt-4o-mini"
    timeout: int = 30
    max_response_tokens: int = 8192  # Let LLM generate full responses; chunker handles size
    system_prompt: str = (
        "RESPONSE RULES:\n"
--- a/meshai/router.py
+++ b/meshai/router.py
@ -643,7 +643,7 @@ class MessageRouter:
            response = await self.llm.generate(
                messages=history,
                system_prompt=system_prompt,
-                max_tokens=500,
+                max_tokens=self.config.llm.max_response_tokens,
            )
        except asyncio.TimeoutError:
            logger.error("LLM request timed out")