fix: Remove hard-coded token limits on LLM responses

- Add max_response_tokens config (8192) to LLMConfig
- Use config value in router.py instead of hardcoded 500
- Update base.py default from 300 to 8192
- Lets LLM generate full responses; chunker handles size limits

Fixes truncated responses like Here are three nodes in the freq

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
K7ZVX 2026-05-05 22:06:50 +00:00
commit 79ff756a38
3 changed files with 3 additions and 2 deletions

View file

@ -12,7 +12,7 @@ class LLMBackend(ABC):
self,
messages: list[dict],
system_prompt: str,
max_tokens: int = 300,
max_tokens: int = 8192,
user_id: Optional[str] = None,
) -> str:
"""Generate a response from the LLM.

View file

@ -94,6 +94,7 @@ class LLMConfig:
base_url: str = "https://api.openai.com/v1"
model: str = "gpt-4o-mini"
timeout: int = 30
max_response_tokens: int = 8192 # Let LLM generate full responses; chunker handles size
system_prompt: str = (
"RESPONSE RULES:\n"

View file

@ -643,7 +643,7 @@ class MessageRouter:
response = await self.llm.generate(
messages=history,
system_prompt=system_prompt,
max_tokens=500,
max_tokens=self.config.llm.max_response_tokens,
)
except asyncio.TimeoutError:
logger.error("LLM request timed out")