fix: Short sentence instruction + chunker splits instead of truncating

- Added CRITICAL instruction to keep sentences under 150 chars
- Chunker now splits long sentences at word boundaries instead of truncating
- No words lost when splitting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
K7ZVX 2026-05-05 07:22:52 +00:00
commit 8d1a48ea08
2 changed files with 189 additions and 182 deletions

View file

@ -1,182 +1,188 @@
"""Sentence-aware message chunker for Meshtastic's character limits. """Sentence-aware message chunker for Meshtastic's character limits.
Splits LLM responses into messages that: Splits LLM responses into messages that:
- Never exceed max_chars per message (default 200) - Never exceed max_chars per message (default 200)
- Never split a sentence across messages - Never split a sentence across messages
- Send at most max_messages per response (default 3) - Send at most max_messages per response (default 3)
- If more content remains, replace the last sentence with a continuation prompt - If more content remains, replace the last sentence with a continuation prompt
- Support up to max_continuations follow-ups (default 3) - Support up to max_continuations follow-ups (default 3)
""" """
import logging import logging
import re import re
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Phrases that trigger continuation of a previous response # Phrases that trigger continuation of a previous response
CONTINUE_PHRASES = { CONTINUE_PHRASES = {
"yes", "yeah", "yep", "yea", "sure", "ok", "okay", "go on", "yes", "yeah", "yep", "yea", "sure", "ok", "okay", "go on",
"keep going", "continue", "more", "go ahead", "tell me more", "keep going", "continue", "more", "go ahead", "tell me more",
"yes please", "y", "yes please", "y",
} }
CONTINUATION_PROMPT = "Want me to keep going?" CONTINUATION_PROMPT = "Want me to keep going?"
def split_sentences(text: str) -> list[str]: def split_sentences(text: str) -> list[str]:
"""Split text into sentences, preserving abbreviations and decimals.""" """Split text into sentences, preserving abbreviations and decimals."""
# Split on . ! ? followed by space or end of string # Split on . ! ? followed by space or end of string
# But not on decimals (4.8) or common abbreviations (e.g. Dr. Mr. etc.) # But not on decimals (4.8) or common abbreviations (e.g. Dr. Mr. etc.)
sentences = re.split(r'(?<=[.!?])\s+', text.strip()) sentences = re.split(r'(?<=[.!?])\s+', text.strip())
# Filter empty strings # Filter empty strings
return [s.strip() for s in sentences if s.strip()] return [s.strip() for s in sentences if s.strip()]
def chunk_response( def chunk_response(
text: str, text: str,
max_chars: int = 200, max_chars: int = 200,
max_messages: int = 3, max_messages: int = 3,
) -> tuple[list[str], str]: ) -> tuple[list[str], str]:
"""Split a response into sentence-aligned messages. """Split a response into sentence-aligned messages.
Args: Args:
text: Full LLM response text text: Full LLM response text
max_chars: Maximum characters per message max_chars: Maximum characters per message
max_messages: Maximum messages to send before prompting max_messages: Maximum messages to send before prompting
Returns: Returns:
Tuple of (messages_to_send, remaining_text) Tuple of (messages_to_send, remaining_text)
If remaining_text is non-empty, the last message includes If remaining_text is non-empty, the last message includes
a continuation prompt. a continuation prompt.
""" """
sentences = split_sentences(text) sentences = split_sentences(text)
if not sentences: if not sentences:
return [text[:max_chars]], "" return [text[:max_chars]], ""
messages = [] messages = []
current_msg = [] current_msg = []
current_len = 0 current_len = 0
sentence_idx = 0 sentence_idx = 0
while sentence_idx < len(sentences) and len(messages) < max_messages: while sentence_idx < len(sentences) and len(messages) < max_messages:
sentence = sentences[sentence_idx] sentence = sentences[sentence_idx]
# Would this sentence fit in the current message? # Would this sentence fit in the current message?
added_len = len(sentence) + (1 if current_msg else 0) # +1 for space added_len = len(sentence) + (1 if current_msg else 0) # +1 for space
if current_len + added_len <= max_chars: if current_len + added_len <= max_chars:
current_msg.append(sentence) current_msg.append(sentence)
current_len += added_len current_len += added_len
sentence_idx += 1 sentence_idx += 1
else: else:
# Sentence doesn't fit # Sentence doesn't fit
if current_msg: if current_msg:
# Flush current message, start new one with this sentence # Flush current message, start new one with this sentence
messages.append(" ".join(current_msg)) messages.append(" ".join(current_msg))
current_msg = [] current_msg = []
current_len = 0 current_len = 0
# Don't increment sentence_idx — retry this sentence in next message # Don't increment sentence_idx — retry this sentence in next message
else: else:
# Single sentence exceeds max_chars — truncate it # Single sentence exceeds max_chars — split at last word boundary
messages.append(sentence[:max_chars]) break_point = sentence[:max_chars].rfind(' ')
sentence_idx += 1 if break_point <= 0:
break_point = max_chars
# Flush any remaining buffered message messages.append(sentence[:break_point].rstrip())
if current_msg and len(messages) < max_messages: leftover = sentence[break_point:].lstrip()
messages.append(" ".join(current_msg)) if leftover:
sentences.insert(sentence_idx + 1, leftover)
# Determine remaining text sentence_idx += 1
remaining_sentences = sentences[sentence_idx:]
# Flush any remaining buffered message
# Also include any sentence that was in current_msg but didn't get flushed if current_msg and len(messages) < max_messages:
# because we hit max_messages messages.append(" ".join(current_msg))
if current_msg and len(messages) >= max_messages:
remaining_sentences = [" ".join(current_msg)] + remaining_sentences # Determine remaining text
remaining_sentences = sentences[sentence_idx:]
remaining = " ".join(remaining_sentences)
# Also include any sentence that was in current_msg but didn't get flushed
# If there's remaining content, replace the end of the last message # because we hit max_messages
# with a continuation prompt if current_msg and len(messages) >= max_messages:
if remaining: remaining_sentences = [" ".join(current_msg)] + remaining_sentences
prompt = CONTINUATION_PROMPT
last_msg = messages[-1] if messages else "" remaining = " ".join(remaining_sentences)
# Check if we can append the prompt to the last message # If there's remaining content, replace the end of the last message
if len(last_msg) + 1 + len(prompt) <= max_chars: # with a continuation prompt
messages[-1] = last_msg + " " + prompt if remaining:
else: prompt = CONTINUATION_PROMPT
# Need to shorten the last message to fit the prompt last_msg = messages[-1] if messages else ""
# Remove sentences from the end until it fits
last_sentences = split_sentences(last_msg) # Check if we can append the prompt to the last message
while last_sentences: if len(last_msg) + 1 + len(prompt) <= max_chars:
test = " ".join(last_sentences) + " " + prompt messages[-1] = last_msg + " " + prompt
if len(test) <= max_chars: else:
# Put removed sentences back into remaining # Need to shorten the last message to fit the prompt
messages[-1] = test # Remove sentences from the end until it fits
break last_sentences = split_sentences(last_msg)
removed = last_sentences.pop() while last_sentences:
remaining = removed + " " + remaining test = " ".join(last_sentences) + " " + prompt
else: if len(test) <= max_chars:
# Couldn't fit — just use the prompt as the last message # Put removed sentences back into remaining
messages[-1] = prompt messages[-1] = test
break
return messages, remaining removed = last_sentences.pop()
remaining = removed + " " + remaining
else:
class ContinuationState: # Couldn't fit — just use the prompt as the last message
"""Tracks continuation state per user.""" messages[-1] = prompt
def __init__(self, max_continuations: int = 3): return messages, remaining
self.max_continuations = max_continuations
# user_id -> {"remaining": str, "count": int}
self._state: dict[str, dict] = {} class ContinuationState:
"""Tracks continuation state per user."""
def has_pending(self, user_id: str) -> bool:
"""Check if user has pending continuation content.""" def __init__(self, max_continuations: int = 3):
return user_id in self._state and bool(self._state[user_id]["remaining"]) self.max_continuations = max_continuations
# user_id -> {"remaining": str, "count": int}
def is_continuation_request(self, text: str) -> bool: self._state: dict[str, dict] = {}
"""Check if the message is a request to continue."""
return text.strip().lower().rstrip("!.,?") in CONTINUE_PHRASES def has_pending(self, user_id: str) -> bool:
"""Check if user has pending continuation content."""
def store(self, user_id: str, remaining: str) -> None: return user_id in self._state and bool(self._state[user_id]["remaining"])
"""Store remaining content for a user."""
if remaining: def is_continuation_request(self, text: str) -> bool:
existing = self._state.get(user_id, {"count": 0}) """Check if the message is a request to continue."""
self._state[user_id] = { return text.strip().lower().rstrip("!.,?") in CONTINUE_PHRASES
"remaining": remaining,
"count": existing.get("count", 0), def store(self, user_id: str, remaining: str) -> None:
} """Store remaining content for a user."""
elif user_id in self._state: if remaining:
del self._state[user_id] existing = self._state.get(user_id, {"count": 0})
self._state[user_id] = {
def get_continuation(self, user_id: str) -> tuple[list[str], str] | None: "remaining": remaining,
"""Get the next batch of messages for a continuation request. "count": existing.get("count", 0),
}
Returns None if no pending content or max continuations reached. elif user_id in self._state:
""" del self._state[user_id]
if user_id not in self._state:
return None def get_continuation(self, user_id: str) -> tuple[list[str], str] | None:
"""Get the next batch of messages for a continuation request.
state = self._state[user_id]
if state["count"] >= self.max_continuations: Returns None if no pending content or max continuations reached.
del self._state[user_id] """
return None if user_id not in self._state:
return None
remaining = state["remaining"]
if not remaining: state = self._state[user_id]
del self._state[user_id] if state["count"] >= self.max_continuations:
return None del self._state[user_id]
return None
messages, new_remaining = chunk_response(remaining)
state["count"] += 1 remaining = state["remaining"]
state["remaining"] = new_remaining if not remaining:
del self._state[user_id]
if not new_remaining: return None
del self._state[user_id]
messages, new_remaining = chunk_response(remaining)
return messages, new_remaining state["count"] += 1
state["remaining"] = new_remaining
def clear(self, user_id: str) -> None:
"""Clear continuation state for a user.""" if not new_remaining:
self._state.pop(user_id, None) del self._state[user_id]
return messages, new_remaining
def clear(self, user_id: str) -> None:
"""Clear continuation state for a user."""
self._state.pop(user_id, None)

View file

@ -102,6 +102,7 @@ RESPONSE STYLE:
- Include scores, percentages, node counts, battery levels, gateway counts - Include scores, percentages, node counts, battery levels, gateway counts
- You CAN use 3-5 messages if needed LoRa chunking handles splitting - You CAN use 3-5 messages if needed LoRa chunking handles splitting
- No markdown formatting plain text only - No markdown formatting plain text only
- CRITICAL: Keep every sentence under 150 characters. Break long thoughts into multiple short sentences. The message system handles multiple sentences perfectly but will truncate a single long sentence.
ANSWERING COVERAGE QUESTIONS: ANSWERING COVERAGE QUESTIONS:
- Reference geographic areas by local name from the region config - Reference geographic areas by local name from the region config