fix: Command output goes through chunker, byte-safe for LoRa

- Commands now chunk output same as LLM responses - split_sentences splits on newlines first for !health output - chunk_response uses byte counting instead of character counting - Emojis and UTF-8 properly counted for 228-byte LoRa limit - !health 274 bytes now splits into 2 messages (195 + 74 bytes) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-21 15:14:45 +02:00 · 2026-05-05 23:07:25 +00:00 · 2026-05-05 23:07:25 +00:00 · 5be1d20b24
commit 5be1d20b24
parent 1662d80f02
2 changed files with 70 additions and 23 deletions
--- a/meshai/chunker.py
+++ b/meshai/chunker.py
@ -24,12 +24,25 @@ CONTINUATION_PROMPT = "Want me to keep going?"
 def split_sentences(text: str) -> list[str]:
-    """Split text into sentences, preserving abbreviations and decimals."""
+    """Split text into sentences on periods, newlines, or question marks."""
-    # Split on . ! ? followed by space or end of string
+    # First split on newlines (each line is a chunk candidate)
-    # But not on decimals (4.8) or common abbreviations (e.g. Dr. Mr. etc.)
+    lines = text.strip().split('\n')
-    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+
-    # Filter empty strings
+    sentences = []
-    return [s.strip() for s in sentences if s.strip()]
+    for line in lines:
        line = line.strip()
        if not line:
            continue
        # Then split on sentence boundaries within each line
        parts = re.split(r'(?<=[.!?])\s+', line)
        sentences.extend(p.strip() for p in parts if p.strip())
    return sentences
 def _byte_len(s: str) -> int:
    """Get UTF-8 byte length of a string."""
    return len(s.encode('utf-8'))
 def chunk_response(
@ -41,7 +54,7 @@ def chunk_response(
    Args:
        text: Full LLM response text
-        max_chars: Maximum characters per message
+        max_chars: Maximum BYTES per message (LoRa limit, not characters)
        max_messages: Maximum messages to send before prompting
    Returns:
@ -51,22 +64,27 @@ def chunk_response(
    """
    sentences = split_sentences(text)
    if not sentences:
-        return [text[:max_chars]], ""
+        truncated = text[:max_chars]
        while _byte_len(truncated) > max_chars and truncated:
            truncated = truncated[:-1]
        return [truncated], ""
    messages = []
    current_msg = []
-    current_len = 0
+    current_bytes = 0
    sentence_idx = 0
    while sentence_idx < len(sentences) and len(messages) < max_messages:
        sentence = sentences[sentence_idx]
        sentence_bytes = _byte_len(sentence)
        # Would this sentence fit in the current message?
-        added_len = len(sentence) + (1 if current_msg else 0)  # +1 for space
+        # +1 byte for space between sentences
        added_bytes = sentence_bytes + (1 if current_msg else 0)
-        if current_len + added_len <= max_chars:
+        if current_bytes + added_bytes <= max_chars:
            current_msg.append(sentence)
-            current_len += added_len
+            current_bytes += added_bytes
            sentence_idx += 1
        else:
            # Sentence doesn't fit
@ -74,17 +92,36 @@ def chunk_response(
                # Flush current message, start new one with this sentence
                messages.append(" ".join(current_msg))
                current_msg = []
-                current_len = 0
+                current_bytes = 0
                # Don't increment sentence_idx — retry this sentence in next message
            else:
                # Single sentence exceeds max_chars — split at last word boundary
-                break_point = sentence[:max_chars].rfind(' ')
+                # Find break point that fits in byte budget
-                if break_point <= 0:
+                words = sentence.split(' ')
-                    break_point = max_chars
+                fit_words = []
-                messages.append(sentence[:break_point].rstrip())
+                fit_bytes = 0
-                leftover = sentence[break_point:].lstrip()
+                for word in words:
-                if leftover:
+                    word_bytes = _byte_len(word) + (1 if fit_words else 0)
-                    sentences.insert(sentence_idx + 1, leftover)
+                    if fit_bytes + word_bytes <= max_chars:
                        fit_words.append(word)
                        fit_bytes += word_bytes
                    else:
                        break
                if fit_words:
                    messages.append(" ".join(fit_words))
                    leftover = " ".join(words[len(fit_words):])
                    if leftover:
                        sentences.insert(sentence_idx + 1, leftover)
                else:
                    # Even first word doesn't fit — truncate it
                    truncated = sentence
                    while _byte_len(truncated) > max_chars and truncated:
                        truncated = truncated[:-1]
                    messages.append(truncated)
                    leftover = sentence[len(truncated):].lstrip()
                    if leftover:
                        sentences.insert(sentence_idx + 1, leftover)
                sentence_idx += 1
    # Flush any remaining buffered message
@ -108,7 +145,7 @@ def chunk_response(
        last_msg = messages[-1] if messages else ""
        # Check if we can append the prompt to the last message
-        if len(last_msg) + 1 + len(prompt) <= max_chars:
+        if _byte_len(last_msg) + 1 + _byte_len(prompt) <= max_chars:
            messages[-1] = last_msg + " " + prompt
        else:
            # Need to shorten the last message to fit the prompt
@ -116,7 +153,7 @@ def chunk_response(
            last_sentences = split_sentences(last_msg)
            while last_sentences:
                test = " ".join(last_sentences) + " " + prompt
-                if len(test) <= max_chars:
+                if _byte_len(test) <= max_chars:
                    # Put removed sentences back into remaining
                    messages[-1] = test
                    break
--- a/meshai/main.py
+++ b/meshai/main.py
@ -344,7 +344,17 @@ class MeshAI:
            # Determine response
            if result.route_type == RouteType.COMMAND:
-                messages = result.response  # Commands return single string
+                # Chunk command output same as LLM responses
                from .chunker import chunk_response
                raw = result.response if isinstance(result.response, str) else str(result.response)
                messages, remaining = chunk_response(
                    raw,
                    max_chars=self.config.response.max_length,
                    max_messages=self.config.response.max_messages,
                )
                # Store remaining for continuation
                if remaining:
                    self.router.continuations.store(message.sender_id, remaining)
            elif result.route_type == RouteType.LLM:
                messages = await self.router.generate_llm_response(message, result.query)
            else: