Remove AI planning docs and example scripts

These were LLM-generated planning artifacts from the memory implementation phase. Not user-facing documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-21 23:24:44 +02:00 · 2026-02-24 00:22:31 +00:00 · 2026-02-24 00:22:31 +00:00 · 9a628724ce
commit 9a628724ce
parent 8c2c4d2aef
9 changed files with 0 additions and 4013 deletions
--- a/MEMORY_IMPLEMENTATION_GUIDE.md
+++ b/MEMORY_IMPLEMENTATION_GUIDE.md
@ -1,656 +0,0 @@
-# Quick Implementation Guide: Rolling Summary Memory
-
-## TL;DR
-
-**Problem:** Sending full conversation history every request wastes tokens and latency.
-
-**Solution:** Rolling summary approach - keep recent messages + LLM-generated summary of older messages.
-
-**Result:** ~83% token reduction for long conversations, zero dependencies, works with current stack.
-
---
-
-## Architecture
-
-```
-SQLite History (per user)
-    ↓
-Messages 1-10: Summarized → "User asked about weather, discussed outdoor plans"
-Messages 11-18: Sent raw  → Full context
-    ↓
-LLM receives: System prompt + Summary + Recent 8 messages
-    ↓
-Response generated
-```
-
---
-
-## Files to Create/Modify
-
-### 1. Create `meshai/memory.py`
-
-```python
-"""Lightweight rolling summary memory manager."""
-
-import time
-from dataclasses import dataclass
-from typing import Optional
-
-from openai import AsyncOpenAI
-
-
-@dataclass
-class ConversationSummary:
-    """Summary of conversation history."""
-
-    summary: str
-    last_updated: float
-    message_count: int
-
-
-class RollingSummaryMemory:
-    """Manages conversation summaries with recent message window.
-
-    Strategy:
-    - Keep last N message pairs (window_size) in full
-    - Summarize everything before the window
-    - Update summary when old messages accumulate
-
-    Example (window_size=4):
-        Messages 1-10: Summarized to "User discussed weather and plans"
-        Messages 11-18: Kept in full (last 4 pairs)
-        Context sent: [Summary] + [Messages 11-18]
-    """
-
-    def __init__(
-        self,
-        client: AsyncOpenAI,
-        model: str,
-        window_size: int = 4,
-        summarize_threshold: int = 8,
-    ):
-        """Initialize rolling summary memory.
-
-        Args:
-            client: AsyncOpenAI client for generating summaries
-            model: Model name to use for summarization
-            window_size: Number of recent message pairs to keep in full
-            summarize_threshold: Messages to accumulate before re-summarizing
-        """
-        self._client = client
-        self._model = model
-        self._window_size = window_size
-        self._summarize_threshold = summarize_threshold
-
-        # In-memory cache of summaries (loaded from DB on startup)
-        self._summaries: dict[str, ConversationSummary] = {}
-
-    async def get_context_messages(
-        self,
-        user_id: str,
-        full_history: list[dict],
-    ) -> tuple[Optional[str], list[dict]]:
-        """Get optimized context: summary + recent messages.
-
-        Args:
-            user_id: User identifier
-            full_history: Full message history from database
-
-        Returns:
-            Tuple of (summary_text, recent_messages)
-            summary_text is None if conversation is short
-        """
-        # Short conversation - no summary needed
-        if len(full_history) <= self._window_size * 2:
-            return None, full_history
-
-        # Split into old (to summarize) and recent (keep raw)
-        split_point = -(self._window_size * 2)
-        old_messages = full_history[:split_point]
-        recent_messages = full_history[split_point:]
-
-        # Get or create summary
-        summary = await self._get_or_create_summary(user_id, old_messages)
-
-        return summary.summary, recent_messages
-
-    async def _get_or_create_summary(
-        self,
-        user_id: str,
-        messages: list[dict],
-    ) -> ConversationSummary:
-        """Get cached summary or create new one."""
-        # Check cache
-        if user_id in self._summaries:
-            cached = self._summaries[user_id]
-
-            # Reuse if message count is close
-            if abs(cached.message_count - len(messages)) < self._summarize_threshold:
-                return cached
-
-        # Generate new summary
-        summary_text = await self._summarize(messages)
-
-        summary = ConversationSummary(
-            summary=summary_text,
-            last_updated=time.time(),
-            message_count=len(messages),
-        )
-
-        self._summaries[user_id] = summary
-        return summary
-
-    async def _summarize(self, messages: list[dict]) -> str:
-        """Generate summary using LLM."""
-        # Format conversation
-        conversation = "\n".join(
-            [f"{msg['role'].upper()}: {msg['content']}" for msg in messages]
-        )
-
-        prompt = f"""Summarize this conversation in 2-3 concise sentences. Focus on:
- Main topics discussed
- Important context or user preferences
- Key information to remember
-
-Conversation:
-{conversation}
-
-Summary (2-3 sentences):"""
-
-        try:
-            response = await self._client.chat.completions.create(
-                model=self._model,
-                messages=[{"role": "user", "content": prompt}],
-                max_tokens=150,
-                temperature=0.3,
-            )
-
-            return response.choices[0].message.content.strip()
-
-        except Exception as e:
-            # Fallback
-            return f"Previous conversation: {len(messages)} messages about various topics."
-
-    def load_summary(self, user_id: str, summary: ConversationSummary) -> None:
-        """Load summary from database into cache."""
-        self._summaries[user_id] = summary
-
-    def clear_summary(self, user_id: str) -> None:
-        """Clear cached summary for user."""
-        self._summaries.pop(user_id, None)
-```
-
---
-
-### 2. Modify `meshai/history.py`
-
-Add summary storage methods:
-
-```python
-# Add to ConversationHistory class
-
-async def initialize(self) -> None:
-    """Initialize database and create tables."""
-    self._db = await aiosqlite.connect(self._db_path)
-
-    # Existing conversations table
-    await self._db.execute("""
-        CREATE TABLE IF NOT EXISTS conversations (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            user_id TEXT NOT NULL,
-            role TEXT NOT NULL,
-            content TEXT NOT NULL,
-            timestamp REAL NOT NULL
-        )
-    """)
-
-    await self._db.execute("""
-        CREATE INDEX IF NOT EXISTS idx_user_timestamp
-        ON conversations (user_id, timestamp)
-    """)
-
-    # NEW: Summaries table
-    await self._db.execute("""
-        CREATE TABLE IF NOT EXISTS conversation_summaries (
-            user_id TEXT PRIMARY KEY,
-            summary TEXT NOT NULL,
-            message_count INTEGER NOT NULL,
-            updated_at REAL NOT NULL
-        )
-    """)
-
-    await self._db.commit()
-    logger.info(f"Conversation history initialized at {self._db_path}")
-
-
-async def store_summary(
-    self, user_id: str, summary: str, message_count: int
-) -> None:
-    """Store conversation summary.
-
-    Args:
-        user_id: Node ID of user
-        summary: Summary text
-        message_count: Number of messages summarized
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        await self._db.execute(
-            """
-            INSERT OR REPLACE INTO conversation_summaries
-            (user_id, summary, message_count, updated_at)
-            VALUES (?, ?, ?, ?)
-            """,
-            (user_id, summary, message_count, time.time()),
-        )
-        await self._db.commit()
-
-
-async def get_summary(self, user_id: str) -> Optional[dict]:
-    """Get conversation summary for user.
-
-    Args:
-        user_id: Node ID of user
-
-    Returns:
-        Dict with 'summary', 'message_count', 'updated_at' or None
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        cursor = await self._db.execute(
-            """
-            SELECT summary, message_count, updated_at
-            FROM conversation_summaries
-            WHERE user_id = ?
-            """,
-            (user_id,),
-        )
-        row = await cursor.fetchone()
-
-    if not row:
-        return None
-
-    return {
-        "summary": row[0],
-        "message_count": row[1],
-        "updated_at": row[2],
-    }
-
-
-async def clear_summary(self, user_id: str) -> None:
-    """Clear summary for user (e.g., on history reset).
-
-    Args:
-        user_id: Node ID of user
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        await self._db.execute(
-            "DELETE FROM conversation_summaries WHERE user_id = ?",
-            (user_id,),
-        )
-        await self._db.commit()
-```
-
---
-
-### 3. Modify `meshai/backends/openai_backend.py`
-
-Integrate memory manager:
-
-```python
-"""OpenAI-compatible LLM backend with rolling summary memory."""
-
-import logging
-from typing import Optional
-
-from openai import AsyncOpenAI
-
-from ..config import LLMConfig
-from ..memory import RollingSummaryMemory
-from .base import LLMBackend
-
-logger = logging.getLogger(__name__)
-
-
-class OpenAIBackend(LLMBackend):
-    """OpenAI-compatible backend with intelligent memory management."""
-
-    def __init__(self, config: LLMConfig, api_key: str):
-        """Initialize OpenAI backend.
-
-        Args:
-            config: LLM configuration
-            api_key: API key to use
-        """
-        self.config = config
-        self._client = AsyncOpenAI(
-            api_key=api_key,
-            base_url=config.base_url,
-        )
-
-        # Initialize rolling summary memory
-        self._memory = RollingSummaryMemory(
-            client=self._client,
-            model=config.model,
-            window_size=4,  # Keep last 4 exchanges (8 messages)
-            summarize_threshold=8,  # Re-summarize after 8 new messages
-        )
-
-    async def generate(
-        self,
-        messages: list[dict],
-        system_prompt: str,
-        user_id: str = None,  # NEW: optional for backward compatibility
-        max_tokens: int = 300,
-    ) -> str:
-        """Generate a response using OpenAI-compatible API.
-
-        Args:
-            messages: Conversation history
-            system_prompt: System prompt
-            user_id: User identifier (for memory management)
-            max_tokens: Maximum tokens to generate
-
-        Returns:
-            Generated response
-        """
-        # If no user_id, use old behavior (send full history)
-        if not user_id:
-            full_messages = [{"role": "system", "content": system_prompt}]
-            full_messages.extend(messages)
-        else:
-            # Use memory manager to optimize context
-            summary, recent_messages = await self._memory.get_context_messages(
-                user_id=user_id,
-                full_history=messages,
-            )
-
-            # Build optimized message list
-            if summary:
-                # Long conversation: system + summary + recent
-                enhanced_system = f"""{system_prompt}
-
-Previous conversation summary: {summary}"""
-                full_messages = [{"role": "system", "content": enhanced_system}]
-                full_messages.extend(recent_messages)
-
-                logger.debug(
-                    f"Using summary + {len(recent_messages)} recent messages "
-                    f"(total history: {len(messages)})"
-                )
-            else:
-                # Short conversation: system + all messages
-                full_messages = [{"role": "system", "content": system_prompt}]
-                full_messages.extend(messages)
-
-        try:
-            response = await self._client.chat.completions.create(
-                model=self.config.model,
-                messages=full_messages,
-                max_tokens=max_tokens,
-                temperature=0.7,
-            )
-
-            content = response.choices[0].message.content
-            return content.strip() if content else ""
-
-        except Exception as e:
-            logger.error(f"OpenAI API error: {e}")
-            raise
-
-    def load_summary_cache(self, user_id: str, summary_data: dict) -> None:
-        """Load summary into memory cache (called on startup).
-
-        Args:
-            user_id: User identifier
-            summary_data: Dict with 'summary', 'message_count', 'updated_at'
-        """
-        from ..memory import ConversationSummary
-
-        summary = ConversationSummary(
-            summary=summary_data["summary"],
-            message_count=summary_data["message_count"],
-            last_updated=summary_data["updated_at"],
-        )
-        self._memory.load_summary(user_id, summary)
-
-    def clear_summary_cache(self, user_id: str) -> None:
-        """Clear summary cache for user."""
-        self._memory.clear_summary(user_id)
-
-    # ... rest of methods unchanged ...
-```
-
---
-
-### 4. Modify `meshai/responder.py`
-
-Pass user_id to backend and persist summaries:
-
-```python
-# In the generate_response method
-
-async def generate_response(self, user_id: str, message: str) -> str:
-    """Generate LLM response with optimized memory."""
-
-    # Add user message to history
-    await self.history.add_message(user_id, "user", message)
-
-    # Get conversation history
-    history = await self.history.get_history_for_llm(user_id)
-
-    # Generate response with user_id for memory management
-    response = await self.backend.generate(
-        messages=history,
-        system_prompt=self.system_prompt,
-        user_id=user_id,  # NEW: enables memory optimization
-        max_tokens=300,
-    )
-
-    # Add assistant response to history
-    await self.history.add_message(user_id, "assistant", response)
-
-    # Persist summary if one was created
-    # The memory manager caches it, we need to save to DB
-    summary_data = await self._get_current_summary(user_id)
-    if summary_data:
-        await self.history.store_summary(
-            user_id,
-            summary_data["summary"],
-            summary_data["message_count"],
-        )
-
-    return response
-
-
-async def _get_current_summary(self, user_id: str) -> Optional[dict]:
-    """Get current summary from memory manager if it exists."""
-    # Access the memory manager's cache
-    if hasattr(self.backend, "_memory"):
-        summary = self.backend._memory._summaries.get(user_id)
-        if summary:
-            return {
-                "summary": summary.summary,
-                "message_count": summary.message_count,
-                "updated_at": summary.last_updated,
-            }
-    return None
-```
-
---
-
-### 5. Modify `meshai/commands/reset.py`
-
-Clear summaries when resetting history:
-
-```python
-async def execute(self, sender_id: str, args: list[str]) -> str:
-    """Reset conversation history."""
-    count = await self.responder.history.clear_history(sender_id)
-
-    # NEW: Also clear summary
-    await self.responder.history.clear_summary(sender_id)
-    if hasattr(self.responder.backend, "clear_summary_cache"):
-        self.responder.backend.clear_summary_cache(sender_id)
-
-    return f"Cleared {count} messages from your history."
-```
-
---
-
-## Configuration
-
-Add to `meshai/config.py`:
-
-```python
-@dataclass
-class MemoryConfig:
-    """Memory management configuration."""
-
-    # Rolling summary settings
-    window_size: int = 4  # Recent message pairs to keep
-    summarize_threshold: int = 8  # Messages before re-summarizing
-
-    # When to enable summaries
-    min_messages_for_summary: int = 10  # Start summarizing after this many
-```
-
---
-
-## Testing
-
-```python
-# Test script
-import asyncio
-from meshai.backends.openai_backend import OpenAIBackend
-from meshai.config import LLMConfig
-
-async def test():
-    config = LLMConfig(
-        backend="openai",
-        base_url="http://192.168.1.239:8000/v1",
-        model="gpt-4o-mini"
-    )
-
-    backend = OpenAIBackend(config, "your-key")
-
-    # Simulate long conversation
-    messages = []
-    for i in range(20):
-        messages.append({"role": "user", "content": f"Question {i}"})
-        messages.append({"role": "assistant", "content": f"Answer {i}"})
-
-    # Generate - should use summary
-    response = await backend.generate(
-        messages=messages,
-        system_prompt="You are helpful.",
-        user_id="!test123",
-        max_tokens=100
-    )
-
-    print(f"Response: {response}")
-    print(f"Sent {len(messages)} messages, but only ~10 used in context")
-
-asyncio.run(test())
-```
-
---
-
-## Expected Results
-
-### Token Usage Comparison
-
-**Before (full history):**
-```
-User message 1-20: ~2000 tokens
-System prompt: ~50 tokens
-Total: ~2050 tokens per request
-```
-
-**After (with summary):**
-```
-System prompt: ~50 tokens
-Summary: ~100 tokens
-Recent 8 messages: ~400 tokens
-Total: ~550 tokens per request
-```
-
-**Savings: ~73% token reduction**
-
-### Performance Impact
-
- **Summary generation**: ~1-2s every 8-10 messages (amortized)
- **Regular requests**: No added latency
- **Storage**: ~100 bytes per summary in SQLite
-
---
-
-## Tuning Parameters
-
-### window_size
- **Smaller (2-3)**: More aggressive summarization, max token savings
- **Larger (5-6)**: More context, less summarization
- **Recommended**: 4 (last 4 exchanges = 8 messages)
-
-### summarize_threshold
- **Smaller (4-6)**: Frequent re-summarization, more current
- **Larger (10-12)**: Less summarization overhead
- **Recommended**: 8 (re-summarize after 8 new messages)
-
-### For MeshAI specifically:
- Messages are tiny (150 chars max)
- `window_size=4` gives ~600 chars of recent context
- `summarize_threshold=8` balances overhead vs accuracy
-
---
-
-## Migration Path
-
-1. **Phase 1**: Add code, test with new users
-2. **Phase 2**: Run in parallel (old + new backend)
-3. **Phase 3**: Migrate existing users (generate summaries for existing history)
-4. **Phase 4**: Remove old full-history code path
-
-No data loss - summaries stored in DB, can regenerate anytime.
-
---
-
-## Maintenance
-
-### Monitor summary quality:
-```sql
-- Check summaries
-SELECT user_id, summary, message_count, updated_at
-FROM conversation_summaries
-ORDER BY updated_at DESC;
-```
-
-### Regenerate summary:
-```python
-# Clear cache + DB, will regenerate on next request
-await history.clear_summary(user_id)
-backend.clear_summary_cache(user_id)
-```
-
-### Adjust if summaries too short/long:
- Modify prompt in `_summarize()`
- Adjust `max_tokens=150` for summaries
- Change temperature (lower = more consistent)
-
---
-
-## Future Enhancements
-
-1. **Hybrid approach**: Summary + semantic search for very long histories
-2. **User preferences**: Store separate from summary (e.g., "likes weather in metric")
-3. **Multi-level summaries**: Summarize summaries for years-long conversations
-4. **Summary quality scoring**: Validate summaries maintain key information
-
-But start simple - this gets 80% of the benefit with 20% of the complexity.
--- a/MEMORY_README.md
+++ b/MEMORY_README.md
@ -1,437 +0,0 @@
-# LLM Conversation Memory Research & Implementation
-
-This directory contains comprehensive research and implementation guides for improving LLM conversation memory in MeshAI.
-
-## Problem Statement
-
-MeshAI currently sends the full conversation history with every LLM API call. This approach:
- Wastes tokens (expensive and slow)
- Doesn't scale to long conversations
- Sends redundant context the LLM doesn't need
-
-## Solution: Rolling Summary Memory
-
-Keep recent messages in full + LLM-generated summary of older messages.
-
-**Result:** 70-80% token reduction, zero dependencies, works with existing stack.
-
---
-
-## Documentation Index
-
-### 1. Quick Start
-
-**READ THIS FIRST:** [`MEMORY_SUMMARY.md`](/home/zvx/projects/meshai/MEMORY_SUMMARY.md)
- High-level overview
- Why rolling summary?
- Comparison with alternatives
- Expected performance gains
-
-**Estimated reading time:** 10 minutes
-
---
-
-### 2. Detailed Research
-
-**FOR DEEP DIVE:** [`MEMORY_RESEARCH.md`](/home/zvx/projects/meshai/MEMORY_RESEARCH.md)
- Full evaluation of 5 approaches:
-  1. LangChain Memory modules
-  2. LlamaIndex
-  3. MemGPT/Letta
-  4. Vector stores (ChromaDB/Qdrant)
-  5. Simple rolling summary (DIY)
- Code examples for each approach
- Pros/cons for MeshAI specifically
- Detailed comparison matrix
-
-**Estimated reading time:** 30-45 minutes
-
---
-
-### 3. Implementation Guide
-
-**FOR BUILDING:** [`MEMORY_IMPLEMENTATION_GUIDE.md`](/home/zvx/projects/meshai/MEMORY_IMPLEMENTATION_GUIDE.md)
- Step-by-step implementation
- Complete code examples
- Database schema
- Configuration options
- Testing procedures
- Troubleshooting guide
-
-**Estimated reading time:** 20 minutes + implementation time
-
---
-
-### 4. Implementation Diff
-
-**FOR EXACT CHANGES:** [`docs/IMPLEMENTATION_DIFF.md`](/home/zvx/projects/meshai/docs/IMPLEMENTATION_DIFF.md)
- Exact code diffs for all files
- Line-by-line changes needed
- Migration checklist
- Rollback plan
- Performance validation queries
-
-**Estimated reading time:** 15 minutes
-
---
-
-### 5. Visual Comparison
-
-**FOR UNDERSTANDING:** [`docs/memory_approaches_comparison.txt`](/home/zvx/projects/meshai/docs/memory_approaches_comparison.txt)
- ASCII diagrams of all approaches
- Visual token usage comparison
- Decision matrices
- Architecture diagrams
-
-**Estimated reading time:** 10 minutes
-
---
-
-### 6. Quick Reference
-
-**FOR CHEAT SHEET:** [`docs/QUICK_REFERENCE.md`](/home/zvx/projects/meshai/docs/QUICK_REFERENCE.md)
- One-page reference card
- Key configuration
- Code snippets
- Performance metrics
- Troubleshooting tips
-
-**Estimated reading time:** 5 minutes
-
---
-
-### 7. Proof of Concept
-
-**FOR TESTING:** [`examples/memory_comparison.py`](/home/zvx/projects/meshai/examples/memory_comparison.py)
- Runnable comparison script
- Tests all 3 approaches side-by-side:
-  - Full history (baseline)
-  - Rolling summary
-  - Window-only
- Real token usage measurements
- Performance comparison
-
-**Usage:**
-```bash
-# Edit script with your LLM endpoint
-nano examples/memory_comparison.py
-# Update BASE_URL, API_KEY, MODEL
-
-# Run comparison
-python examples/memory_comparison.py
-```
-
-**Expected output:**
-```
-Approach             Tokens          Time       Savings
----------------------------------------------------------------------
-Full History         1847            2.34s      (baseline)
-Rolling Summary      512             1.87s      72.3%
-Window Only          398             1.45s      78.4%
-
-RECOMMENDATION: Rolling Summary - best balance of context and efficiency
-```
-
---
-
-## Recommended Reading Path
-
-### Path 1: Executive Summary (20 minutes)
-1. `MEMORY_SUMMARY.md` - Overview
-2. `docs/QUICK_REFERENCE.md` - Cheat sheet
-3. `examples/memory_comparison.py` - Run the test
-
-**Decision point:** Convinced? Proceed to implementation.
-
---
-
-### Path 2: Technical Deep Dive (60 minutes)
-1. `MEMORY_SUMMARY.md` - Overview
-2. `MEMORY_RESEARCH.md` - Full evaluation
-3. `docs/memory_approaches_comparison.txt` - Visual diagrams
-4. `examples/memory_comparison.py` - Run the test
-5. `MEMORY_IMPLEMENTATION_GUIDE.md` - How to build it
-
-**Decision point:** Ready to implement? Use the diff guide.
-
---
-
-### Path 3: Implementation (2-3 hours)
-1. `MEMORY_SUMMARY.md` - Refresh on approach
-2. `MEMORY_IMPLEMENTATION_GUIDE.md` - Full implementation guide
-3. `docs/IMPLEMENTATION_DIFF.md` - Exact changes needed
-4. Code the changes
-5. Test with `examples/memory_comparison.py`
-6. Deploy and monitor
-
-**Outcome:** Production-ready rolling summary memory.
-
---
-
-## Files Created
-
-### Documentation
-```
-/home/zvx/projects/meshai/
-├── MEMORY_README.md (this file)
-├── MEMORY_SUMMARY.md (overview)
-├── MEMORY_RESEARCH.md (detailed research)
-├── MEMORY_IMPLEMENTATION_GUIDE.md (step-by-step)
-├── docs/
-│   ├── IMPLEMENTATION_DIFF.md (exact changes)
-│   ├── memory_approaches_comparison.txt (diagrams)
-│   └── QUICK_REFERENCE.md (cheat sheet)
-└── examples/
-    └── memory_comparison.py (proof of concept)
-```
-
-### Code to Create (not yet created)
-```
-meshai/
-├── memory.py (NEW - ~100 lines)
-├── history.py (MODIFY - add ~70 lines)
-├── backends/
-│   └── openai_backend.py (MODIFY - add ~30 lines)
-├── responder.py (MODIFY - add ~10 lines)
-└── commands/
-    └── reset.py (MODIFY - add ~4 lines)
-```
-
-**Total new code:** ~214 lines
-**Dependencies added:** 0
-
---
-
-## Key Metrics
-
-### Token Savings
-
-| Conversation Length | Before | After | Savings |
-|---------------------|--------|-------|---------|
-| 10 messages | 800 | 800 | 0% |
-| 20 messages | 1600 | 550 | 66% |
-| 30 messages | 2400 | 600 | 75% |
-| 50 messages | 4000 | 650 | 84% |
-
-### Cost Impact
-
-**Assumptions:**
- $0.50 per 1M input tokens
- 1000 requests per day
- Average 30 messages per conversation
-
-**Before:** $36/month
-**After:** $9/month
-**Savings:** $27/month (75% reduction)
-
-### Implementation Effort
-
- Code to write: ~214 lines
- Code to modify: ~57 lines
- Time estimate: 2-3 hours
- Testing: 1 hour
- **Total:** Half a day
-
-### Risk Assessment
-
- **Low risk:** Backward compatible (user_id parameter optional)
- **No data loss:** New table, existing data untouched
- **Easy rollback:** Git revert + drop one table
- **No dependencies:** Pure Python, existing libraries only
-
---
-
-## Configuration Summary
-
-### Recommended for MeshAI
-
-```python
-RollingSummaryMemory(
-    client=self._client,
-    model=config.model,
-    window_size=4,           # Keep last 4 exchanges (8 messages)
-    summarize_threshold=8,   # Re-summarize after 8 new messages
-)
-```
-
-**Rationale:**
- MeshAI messages are tiny (150 chars max)
- window_size=4 gives ~600 chars of recent context
- summarize_threshold=8 balances overhead vs freshness
- Tune based on actual usage patterns
-
-### Alternative Configurations
-
-**For longer messages:**
-```python
-window_size=3,           # Less recent context needed
-summarize_threshold=6,   # More frequent updates
-```
-
-**For very short messages:**
-```python
-window_size=6,           # More recent context
-summarize_threshold=10,  # Less frequent summarization
-```
-
---
-
-## Database Schema
-
-### New Table
-
-```sql
-CREATE TABLE conversation_summaries (
-    user_id TEXT PRIMARY KEY,
-    summary TEXT NOT NULL,
-    message_count INTEGER NOT NULL,
-    updated_at REAL NOT NULL
-);
-```
-
-### Existing Tables (unchanged)
-
-```sql
-CREATE TABLE conversations (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    user_id TEXT NOT NULL,
-    role TEXT NOT NULL,
-    content TEXT NOT NULL,
-    timestamp REAL NOT NULL
-);
-
-CREATE INDEX idx_user_timestamp ON conversations (user_id, timestamp);
-```
-
---
-
-## Testing Checklist
-
- [ ] Database migration works (new table created)
- [ ] Short conversations (<10 messages) use full history
- [ ] Long conversations (>10 messages) use summaries
- [ ] Summaries are stored in database
- [ ] Summaries persist across restarts
- [ ] Reset command clears summaries
- [ ] Token usage reduced by 70%+ for long convos
- [ ] No errors in logs
- [ ] Response quality maintained
-
---
-
-## Monitoring Queries
-
-### Check summary coverage
-```sql
-SELECT
-    (SELECT COUNT(DISTINCT user_id) FROM conversation_summaries) * 100.0 /
-    (SELECT COUNT(DISTINCT user_id) FROM conversations) as coverage_pct;
-```
-
-### Average messages per summary
-```sql
-SELECT AVG(message_count) FROM conversation_summaries;
-```
-
-### Recent summaries
-```sql
-SELECT user_id, summary, message_count,
-       datetime(updated_at, 'unixepoch') as updated
-FROM conversation_summaries
-ORDER BY updated_at DESC
-LIMIT 10;
-```
-
---
-
-## Troubleshooting
-
-### Summary not being created
-
-**Check:** Conversation long enough?
-```sql
-SELECT user_id, COUNT(*) as msg_count
-FROM conversations
-GROUP BY user_id
-HAVING msg_count > 10;
-```
-
-**Fix:** Need >10 messages before summary kicks in.
-
-### Summary quality poor
-
-**Check:** Look at actual summaries
-```sql
-SELECT summary FROM conversation_summaries;
-```
-
-**Fix:** Adjust prompt in `memory.py` `_summarize()` method.
-
-### Token usage still high
-
-**Check:** Verify memory is being used
-```bash
-# Look for log line:
-# "Using summary + 8 recent messages (total history: 24)"
-```
-
-**Fix:** Ensure `user_id` is being passed to `backend.generate()`.
-
-### Database errors
-
-**Check:** Table exists
-```sql
-.tables
-```
-
-**Fix:** Drop and recreate
-```sql
-DROP TABLE IF EXISTS conversation_summaries;
-- Restart app to recreate
-```
-
---
-
-## Next Steps
-
-1. **Understand:** Read `MEMORY_SUMMARY.md`
-2. **Evaluate:** Review `MEMORY_RESEARCH.md` for alternatives
-3. **Test:** Run `examples/memory_comparison.py` with your LLM
-4. **Implement:** Follow `MEMORY_IMPLEMENTATION_GUIDE.md`
-5. **Deploy:** Use `docs/IMPLEMENTATION_DIFF.md` for exact changes
-6. **Monitor:** Check database and logs for summary generation
-7. **Tune:** Adjust `window_size` and `summarize_threshold` as needed
-
---
-
-## Support
-
-If you have questions or issues:
-
-1. Check the troubleshooting section in this file
-2. Review `docs/QUICK_REFERENCE.md` for common issues
-3. Look at the detailed implementation guide
-4. Check the proof-of-concept script for working examples
-
---
-
-## Conclusion
-
-Rolling summary memory provides:
- **Massive efficiency gains** (70-80% token reduction)
- **Zero dependencies** (pure Python)
- **Simple implementation** (~200 lines)
- **Production ready** (tested approach)
- **Backward compatible** (optional user_id)
- **Easy to maintain** (clear, documented code)
-
-**Recommendation:** Implement this for MeshAI. It's the right balance of simplicity and effectiveness.
-
-Good luck! The documentation is comprehensive - you have everything needed to succeed.
-
---
-
-**Research completed:** 2025-12-15
-**Total documentation:** 7 files, ~1500 lines
-**Implementation effort:** ~3 hours
-**Expected ROI:** $324/year in token savings (at modest 1000 req/day)
--- a/MEMORY_RESEARCH.md
+++ b/MEMORY_RESEARCH.md
--- a/MEMORY_SUMMARY.md
+++ b/MEMORY_SUMMARY.md
@ -1,219 +0,0 @@
-# LLM Memory Research Summary
-
-## The Problem
-
-MeshAI currently stuffs full conversation history into every LLM API call:
- Inefficient: Wastes tokens on old context
- Slow: More tokens = higher latency
- Expensive: Unnecessary token costs
- Doesn't scale: Long conversations become unwieldy
-
-## Solutions Evaluated
-
-### 1. LangChain Memory Modules
-
-**Tested:**
- `ConversationBufferMemory`: Stores everything (no improvement)
- `ConversationBufferWindowMemory`: Last N messages only
- `ConversationSummaryMemory`: LLM-generated summaries + recent messages
-
-**Verdict:** `ConversationSummaryMemory` is best, but adds 50MB dependency. Can DIY the same thing in <100 lines.
-
-### 2. LlamaIndex
-
-**Tested:** `ChatMemoryBuffer` with token limiting
-
-**Verdict:** Token-aware pruning is nice, but 100MB+ dependency is overkill. Less mature than LangChain.
-
-### 3. MemGPT/Letta
-
-**Tested:** Self-editing memory architecture
-
-**Verdict:** Way too heavy (200MB+), requires vector embeddings. Designed for complex multi-day agents, not 150-char mesh messages.
-
-### 4. Vector Stores (ChromaDB/Qdrant)
-
-**Tested:** Semantic search for relevant past context
-
-**Verdict:** Interesting for long-term cross-conversation search, but adds complexity. Not needed for per-user linear conversations.
-
-### 5. Simple Rolling Summary (DIY)
-
-**Tested:** Keep last N messages + LLM-generated summary of older messages
-
-**Verdict:** WINNER - Zero dependencies, 80% token savings, works with existing stack.
-
---
-
-## Recommendation: Rolling Summary
-
-### Why
-
-1. **Zero dependencies** - Pure Python, uses existing AsyncOpenAI client
-2. **Simple** - ~100 lines of code, easy to understand and maintain
-3. **Effective** - 73-83% token reduction for long conversations
-4. **Persistent** - Summaries stored in SQLite, survive restarts
-5. **Compatible** - Works with LiteLLM, local models, any OpenAI-compatible API
-6. **Tunable** - Two params: `window_size` (recent messages) and `summarize_threshold` (when to re-summarize)
-
-### How It Works
-
-```
-Full History (20 messages):
-┌─────────────────────────────────────────────────────┐
-│ User: What's the weather?                           │
-│ Assistant: Sunny, 72°F                              │
-│ ... (16 more messages) ...                          │
-│ User: Which trail should I take?                    │
-│ Assistant: Mt Si if you're fit, Rattlesnake if not │
-└─────────────────────────────────────────────────────┘
-  ↓ Sent to LLM: 2000+ tokens
-
-With Rolling Summary:
-┌─────────────────────────────────────────────────────┐
-│ SUMMARY: User asked about weather and hiking.      │
-│ Discussed Mt Si trail (4hrs, moderate) and         │
-│ Rattlesnake Ledge (2mi, easier, lake views).       │
-├─────────────────────────────────────────────────────┤
-│ User: How crowded does it get?                     │
-│ Assistant: Very crowded weekends, go weekdays      │
-│ User: Any other trails nearby?                     │
-│ Assistant: Rattlesnake Ledge is easier and closer │
-│ User: Tell me about Rattlesnake                    │
-│ Assistant: 2 miles, great lake views, popular     │
-│ User: Which would you recommend?                   │
-│ Assistant: Mt Si if fit, Rattlesnake if casual    │
-└─────────────────────────────────────────────────────┘
-  ↓ Sent to LLM: ~500 tokens (75% savings!)
-```
-
-### Configuration
-
-**Recommended for MeshAI:**
- `window_size=4` → Keep last 4 exchanges (8 messages) in full
- `summarize_threshold=8` → Re-summarize after 8 new messages
-
-**Tuning:**
- Smaller window = More aggressive summarization, max token savings
- Larger window = More recent context, less summarization
- Adjust based on average conversation length and message density
-
-### Implementation Effort
-
-**Files to modify:**
-1. Create `meshai/memory.py` - Rolling summary class
-2. Modify `meshai/history.py` - Add summary storage (1 new table, 3 methods)
-3. Modify `meshai/backends/openai_backend.py` - Integrate memory manager
-4. Modify `meshai/responder.py` - Pass user_id, persist summaries
-5. Modify `meshai/commands/reset.py` - Clear summaries on reset
-
-**Total: ~200 lines of new code, ~50 lines of modifications**
-
-### Performance
-
-**Token Usage:**
-
-| Conversation Length | Full History | Rolling Summary | Savings |
-|---------------------|--------------|-----------------|---------|
-| 10 messages | 800 tokens | 800 tokens | 0% (no summary) |
-| 20 messages | 1600 tokens | 550 tokens | 66% |
-| 30 messages | 2400 tokens | 600 tokens | 75% |
-| 50 messages | 4000 tokens | 650 tokens | 84% |
-
-**Cost Impact (at $0.50/1M input tokens):**
- Before: 2400 tokens × $0.0005 = $0.0012 per request
- After: 600 tokens × $0.0005 = $0.0003 per request
- **Savings: $0.0009 per request (75%)**
-
-For 1000 requests/day: **$0.90/day savings** or **$27/month**
-
-**Latency:**
- Summary generation: 1-2s every 8-10 messages (amortized)
- Regular requests: No added latency
- Net effect: Faster due to fewer input tokens
-
---
-
-## When to Use Alternatives
-
-### Use Window-Only (no summary)
- Very short conversations (< 10 messages)
- Don't care about older context
- Want minimal implementation
-
-### Use Vector Store (ChromaDB)
- Need semantic search across users
- Want to find similar past conversations
- Long-term cross-user knowledge base
-
-### Use LangChain SummaryMemory
- Want batteries-included solution
- Don't mind 50MB dependency
- Prefer established library over DIY
-
-### Use MemGPT/Letta
- Multi-day complex agent workflows
- Agent needs to manage own memory
- Have budget for embeddings and compute
-
---
-
-## Next Steps
-
-1. **Read detailed guide:** `/home/zvx/projects/meshai/MEMORY_IMPLEMENTATION_GUIDE.md`
-2. **Review research:** `/home/zvx/projects/meshai/MEMORY_RESEARCH.md`
-3. **Test proof-of-concept:** `python examples/memory_comparison.py`
-4. **Implement rolling summary** following the guide
-5. **Monitor and tune** based on actual conversation patterns
-
---
-
-## Files Created
-
-1. **`MEMORY_SUMMARY.md`** (this file) - Quick overview and recommendation
-2. **`MEMORY_RESEARCH.md`** - Detailed evaluation of all approaches with code examples
-3. **`MEMORY_IMPLEMENTATION_GUIDE.md`** - Step-by-step implementation guide
-4. **`examples/memory_comparison.py`** - Runnable proof-of-concept test script
-
---
-
-## Quick Start
-
-```bash
-# Test the approaches with your LLM
-cd /home/zvx/projects/meshai
-
-# Edit examples/memory_comparison.py with your LLM endpoint
-# Update BASE_URL, API_KEY, MODEL
-
-python examples/memory_comparison.py
-
-# You'll see:
-# - Full history baseline
-# - Rolling summary results
-# - Window-only results
-# - Token savings comparison
-```
-
-Expected output:
-```
-Approach             Tokens          Time       Savings
----------------------------------------------------------------------
-Full History         1847            2.34s      (baseline)
-Rolling Summary      512             1.87s      72.3%
-Window Only          398             1.45s      78.4%
-```
-
-**Conclusion: Rolling Summary gives 70%+ savings while preserving context.**
-
---
-
-## Questions?
-
- How does it handle very long conversations? → Multi-level summaries (summary of summaries)
- What if summary loses important info? → Tune `window_size` to keep more recent context
- Does it work with streaming? → Yes, just apply before streaming starts
- Can I see the summaries? → Query `conversation_summaries` table in SQLite
- How do I regenerate a summary? → Clear it, will auto-regenerate on next request
-
-Start with the recommended settings, monitor, and adjust based on your actual usage patterns.
--- a/PLAN.md
+++ b/PLAN.md
@ -1,356 +0,0 @@
-# MeshAI - Meshtastic LLM Bridge
-
-## Project Overview
-
-A Python application that connects to a Meshtastic node and provides LLM-powered responses to mesh network users. Responds to direct mentions (@nodename) or direct messages. Includes bang commands (`!command`) for utility functions.
-
-## Design Decisions
-
-### 1. Trigger Mechanism
- **@mentions**: Respond when message contains `@<nodename>` (configurable node name)
- **Direct Messages**: Respond to all DMs automatically
- **Bang commands**: `!command` syntax for utility functions (handled before LLM)
- Ignore general channel chatter that doesn't mention the bot
-
-### 2. Conversation History
- Maintain per-user conversation history
- Storage: SQLite database for persistence across restarts
- Context window: Last N messages per user (configurable, default ~20 exchanges)
- With 300 char limit per exchange, context stays small - can maintain long conversations
- Include timestamp tracking for potential "conversation timeout" (e.g., reset after 24h inactivity)
-
-### 3. Rate Limiting & Response Behavior
- **Response delay**: Configurable 2.2-3.0 second random delay before sending
- **Message chunking**: Split responses at 150 characters max per message
- **Max chunks**: 2 messages maximum per response (300 chars total)
- **Brevity prompt**: System prompt instructs LLM to keep responses concise
- **Cooldown**: Optional per-user cooldown to prevent spam
-
-### 4. Identity & Configuration
- Node name/ID determined by the physical node configuration
- Application config includes:
-  - `bot_name`: The @mention trigger name (e.g., "meshbot", "ai")
-  - `owner`: Owner identification for logging/admin purposes
-  - Connection settings (serial port or TCP host:port)
-
-### 5. Channel Filtering
- Configurable list of channels to respond on
- Option to respond on all channels or specific ones only
- DMs always processed regardless of channel settings
-
-## Technical Architecture
-
-```
-┌─────────────────────────────────────────────────────────────┐
-│                        MeshAI                                │
-├─────────────────────────────────────────────────────────────┤
-│  ┌─────────────┐    ┌─────────────┐    ┌─────────────────┐ │
-│  │  Meshtastic │    │   Message   │    │   LLM Backend   │ │
-│  │  Connector  │───▶│   Router    │───▶│   (pluggable)   │ │
-│  │ Serial/TCP  │    │             │    │                 │ │
-│  └─────────────┘    └─────────────┘    └─────────────────┘ │
-│         │                 │                    │            │
-│         │           ┌─────▼─────┐              │            │
-│         │           │ Conversation│             │            │
-│         │           │  History   │◀────────────┘            │
-│         │           │  (SQLite)  │                          │
-│         │           └───────────┘                           │
-│         │                                                   │
-│         ▼                                                   │
-│  ┌─────────────┐                                           │
-│  │  Response   │  - 2.2-3s delay                           │
-│  │  Handler    │  - Chunk to 150 chars                     │
-│  │             │  - Max 2 messages                         │
-│  └─────────────┘                                           │
-└─────────────────────────────────────────────────────────────┘
-```
-
-## LLM Backend Support
-
-### Pluggable Backend Interface
-```python
-class LLMBackend(ABC):
-    @abstractmethod
-    async def generate(self, messages: list[dict], system_prompt: str) -> str:
-        pass
-```
-
-### Supported Backends (Priority Order)
-1. **OpenAI-compatible** (covers most bases)
-   - OpenAI (GPT-4, GPT-4o, etc.)
-   - Local LiteLLM/Open WebUI (ai.echo6.co)
-   - Any OpenAI-compatible API
-
-2. **Anthropic** (Claude)
-   - Direct Anthropic API
-
-3. **Google** (Gemini)
-   - Google AI Studio / Vertex AI
-
-### Configuration Example
-```yaml
-llm:
-  backend: "openai"  # openai, anthropic, google
-  api_key: "${OPENAI_API_KEY}"
-  base_url: "https://api.openai.com/v1"  # or http://ai.echo6.co/api for local
-  model: "gpt-4o-mini"
-
-  # For local LiteLLM:
-  # backend: "openai"
-  # base_url: "http://192.168.1.239:4000/v1"
-  # model: "llama3"
-```
-
-## Configuration File Structure
-
-```yaml
-# config.yaml
-bot:
-  name: "ai"                    # @mention trigger
-  owner: "K7ZVX"               # Owner callsign/name
-  respond_to_mentions: true
-  respond_to_dms: true
-
-connection:
-  type: "serial"               # serial or tcp
-  serial_port: "/dev/ttyUSB0"  # if serial
-  tcp_host: "192.168.1.100"    # if tcp
-  tcp_port: 4403               # if tcp
-
-channels:
-  mode: "all"                  # "all" or "whitelist"
-  whitelist: [0, 1]            # Only if mode is "whitelist"
-
-response:
-  delay_min: 2.2               # seconds
-  delay_max: 3.0               # seconds
-  max_length: 150              # chars per message
-  max_messages: 2              # messages per response
-
-history:
-  database: "conversations.db"
-  max_messages_per_user: 20
-  conversation_timeout: 86400  # seconds (24h)
-
-llm:
-  backend: "openai"
-  api_key: "${LLM_API_KEY}"
-  base_url: "https://api.openai.com/v1"
-  model: "gpt-4o-mini"
-  system_prompt: |
-    You are a helpful assistant on a Meshtastic mesh network.
-    Keep responses VERY brief - under 250 characters total.
-    Be concise but friendly. No markdown formatting.
-
-weather:
-  primary: "openmeteo"         # openmeteo, wttr, or llm
-  fallback: "llm"              # openmeteo, wttr, llm, or none
-  default_location: ""         # Fallback if node has no GPS (e.g., "Seattle, WA")
-
-  openmeteo:
-    url: "https://api.open-meteo.com/v1"  # or self-hosted URL
-
-  wttr:
-    url: "https://wttr.in"     # or self-hosted
-```
-
-## Bang Commands
-
-Commands use `!` prefix (like fq51bbs). Processed before LLM routing.
-
-| Command | Description | Example |
-|---------|-------------|---------|
-| `!help` | List available commands | `!help` |
-| `!ping` | Connectivity test, responds "pong" | `!ping` |
-| `!reset` | Clear your conversation history | `!reset` |
-| `!status` | Bot uptime, message count, version | `!status` |
-| `!weather` | Weather for your node's GPS location (or default) | `!weather` |
-| `!weather <loc>` | Weather for specified location | `!weather Seattle` |
-
-### Weather Command Details
-
-Location resolution order:
-1. If `!weather <location>` - geocode the provided location
-2. If `!weather` (no args) - use sender's node GPS position if available
-3. Fall back to `weather.default_location` from config
-4. If no location found: "No location available. Use !weather <city> or enable GPS on your node."
-
-**Providers:**
- `openmeteo` - Open-Meteo API (free, no key, self-hostable)
- `wttr` - wttr.in (free, simple, self-hostable)
- `llm` - Pass to LLM with websearch (flexible, slower)
-
-Primary/fallback configurable. If primary fails, tries fallback.
-
-### Command Processing Flow
-
-```
-Message received
-      │
-      ▼
-┌─────────────┐
-│ Starts with │──No──▶ Check @mention / DM ──▶ LLM
-│    "!"?     │
-└─────────────┘
-      │Yes
-      ▼
-┌─────────────┐
-│ Parse cmd   │
-│ & args      │
-└─────────────┘
-      │
-      ▼
-┌─────────────┐
-│ Lookup in   │──Not found──▶ "Unknown command. Try !help"
-│ registry    │
-└─────────────┘
-      │Found
-      ▼
-┌─────────────┐
-│ Execute     │
-│ handler     │
-└─────────────┘
-```
-
-### Command Handler Interface
-
-```python
-class CommandHandler(ABC):
-    @abstractmethod
-    async def execute(self, sender_id: str, args: str, context: MessageContext) -> str:
-        """Execute command and return response string."""
-        pass
-```
-
-## CLI Configurator
-
-Interactive TUI configurator using Rich library (same style as fq51bbs).
-
-**Features:**
- Hierarchical menu system with numeric selection
- `0` always = back/save & exit
- Tables showing current values
- Status icons (✓/✗) with color coding
- Setup wizard for first-time configuration
- Unsaved changes tracking
- Inline help for complex options
-
-**Menu Structure:**
-```
-Main Menu
-├── 1. Bot Settings (name, owner, triggers)
-├── 2. Connection (serial/TCP config)
-├── 3. LLM Backend (provider, API keys, model)
-├── 4. Commands & Weather (providers, fallbacks)
-├── 5. Response Settings (delays, chunking)
-├── 6. Channel Filtering
-├── 7. History Settings
-├── 8. Run Setup Wizard
-└── 0. Save & Exit
-```
-
-**Invocation:**
-```bash
-meshai --config          # Launch configurator
-meshai                   # Run bot (uses config.yaml)
-meshai --config-file /path/to/config.yaml  # Use alternate config
-```
-
-**Config Reload/Restart:**
- On save, prompt: "Restart bot with new config? [Y/n]"
- If bot is running as systemd service: `systemctl restart meshai`
- If running in foreground: signal reload (SIGHUP) or full restart
- Store PID file at runtime for service management
-
-## File Structure
-
-```
-meshai/
-├── meshai/
-│   ├── __init__.py
-│   ├── main.py              # Entry point
-│   ├── config.py            # Configuration loading/saving
-│   ├── connector.py         # Meshtastic serial/TCP connection
-│   ├── router.py            # Message routing logic
-│   ├── history.py           # Conversation history (SQLite)
-│   ├── responder.py         # Response handling (delay, chunking)
-│   ├── cli/
-│   │   ├── __init__.py
-│   │   └── configurator.py  # Rich-based TUI configurator
-│   ├── commands/
-│   │   ├── __init__.py
-│   │   ├── base.py          # Command handler interface
-│   │   ├── dispatcher.py    # Command registry & routing
-│   │   ├── help.py          # !help
-│   │   ├── ping.py          # !ping
-│   │   ├── reset.py         # !reset
-│   │   ├── status.py        # !status
-│   │   └── weather.py       # !weather
-│   └── backends/
-│       ├── __init__.py
-│       ├── base.py          # Abstract backend interface
-│       ├── openai.py        # OpenAI-compatible backend
-│       ├── anthropic.py     # Anthropic backend
-│       └── google.py        # Google Gemini backend
-├── config.yaml              # User configuration
-├── requirements.txt
-├── pyproject.toml
-└── README.md
-```
-
-## Dependencies
-
-```
-meshtastic>=2.3.0
-pyyaml>=6.0
-aiosqlite>=0.19.0
-openai>=1.0.0
-anthropic>=0.18.0
-google-generativeai>=0.4.0
-```
-
-## Implementation Phases
-
-### Phase 1: Core Foundation
- [ ] Project structure setup
- [ ] Configuration loading
- [ ] Meshtastic connector (serial first, then TCP)
- [ ] Basic message receiving and logging
-
-### Phase 2: Message Processing
- [ ] Message router (detect @mentions and DMs)
- [ ] Conversation history database
- [ ] User context management
-
-### Phase 3: LLM Integration
- [ ] Backend interface definition
- [ ] OpenAI-compatible backend (covers local + OpenAI)
- [ ] Response generation with history
-
-### Phase 4: Response Handling
- [ ] Delay implementation (2.2-3s random)
- [ ] Message chunking (150 char limit)
- [ ] Send responses back to mesh
-
-### Phase 5: Additional Backends
- [ ] Anthropic backend
- [ ] Google Gemini backend
-
-### Phase 6: Polish
- [ ] Error handling and resilience
- [ ] Logging and monitoring
- [ ] Documentation
- [ ] Packaging for easy installation
-
-## Future Considerations
-
- **Multi-node support**: One instance managing multiple nodes (different presets/locations)
- **Store-and-forward**: Queue messages for offline users
- **Games**: Simple text games (trivia, 8-ball, etc.)
- **Scheduled broadcasts**: Periodic announcements
-
-## Notes
-
- Meshtastic Python API: https://meshtastic.org/docs/software/python/cli/
- Message size limit is 237 bytes, but we're targeting 150 chars for safety and readability
- The meshtastic library handles serial/TCP abstraction well
--- a/docs/IMPLEMENTATION_DIFF.md
+++ b/docs/IMPLEMENTATION_DIFF.md
@ -1,593 +0,0 @@
-# Implementation Diff - Exact Changes Needed
-
-This document shows the exact code changes needed to implement Rolling Summary memory in MeshAI.
-
---
-
-## 1. Create New File: `meshai/memory.py`
-
-**Action:** Create this new file with the complete implementation.
-
-**Location:** `/home/zvx/projects/meshai/meshai/memory.py`
-
-**Content:** See `MEMORY_IMPLEMENTATION_GUIDE.md` section 1 for full code.
-
-**Lines of code:** ~100
-
---
-
-## 2. Modify: `meshai/history.py`
-
-### Add to imports
-```python
-# No new imports needed - already has time, Optional
-```
-
-### Modify `initialize()` method
-
-**Before:**
-```python
-async def initialize(self) -> None:
-    """Initialize database and create tables."""
-    self._db = await aiosqlite.connect(self._db_path)
-
-    await self._db.execute("""
-        CREATE TABLE IF NOT EXISTS conversations (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            user_id TEXT NOT NULL,
-            role TEXT NOT NULL,
-            content TEXT NOT NULL,
-            timestamp REAL NOT NULL
-        )
-    """)
-
-    await self._db.execute("""
-        CREATE INDEX IF NOT EXISTS idx_user_timestamp
-        ON conversations (user_id, timestamp)
-    """)
-
-    await self._db.commit()
-    logger.info(f"Conversation history initialized at {self._db_path}")
-```
-
-**After:**
-```python
-async def initialize(self) -> None:
-    """Initialize database and create tables."""
-    self._db = await aiosqlite.connect(self._db_path)
-
-    await self._db.execute("""
-        CREATE TABLE IF NOT EXISTS conversations (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            user_id TEXT NOT NULL,
-            role TEXT NOT NULL,
-            content TEXT NOT NULL,
-            timestamp REAL NOT NULL
-        )
-    """)
-
-    await self._db.execute("""
-        CREATE INDEX IF NOT EXISTS idx_user_timestamp
-        ON conversations (user_id, timestamp)
-    """)
-
-    # NEW: Summary table
-    await self._db.execute("""
-        CREATE TABLE IF NOT EXISTS conversation_summaries (
-            user_id TEXT PRIMARY KEY,
-            summary TEXT NOT NULL,
-            message_count INTEGER NOT NULL,
-            updated_at REAL NOT NULL
-        )
-    """)
-
-    await self._db.commit()
-    logger.info(f"Conversation history initialized at {self._db_path}")
-```
-
-### Add new methods (append to end of class)
-
-```python
-async def store_summary(
-    self, user_id: str, summary: str, message_count: int
-) -> None:
-    """Store conversation summary.
-
-    Args:
-        user_id: Node ID of user
-        summary: Summary text
-        message_count: Number of messages summarized
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        await self._db.execute(
-            """
-            INSERT OR REPLACE INTO conversation_summaries
-            (user_id, summary, message_count, updated_at)
-            VALUES (?, ?, ?, ?)
-            """,
-            (user_id, summary, message_count, time.time()),
-        )
-        await self._db.commit()
-
-
-async def get_summary(self, user_id: str) -> Optional[dict]:
-    """Get conversation summary for user.
-
-    Args:
-        user_id: Node ID of user
-
-    Returns:
-        Dict with 'summary', 'message_count', 'updated_at' or None
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        cursor = await self._db.execute(
-            """
-            SELECT summary, message_count, updated_at
-            FROM conversation_summaries
-            WHERE user_id = ?
-            """,
-            (user_id,),
-        )
-        row = await cursor.fetchone()
-
-    if not row:
-        return None
-
-    return {
-        "summary": row[0],
-        "message_count": row[1],
-        "updated_at": row[2],
-    }
-
-
-async def clear_summary(self, user_id: str) -> None:
-    """Clear summary for user (e.g., on history reset).
-
-    Args:
-        user_id: Node ID of user
-    """
-    if not self._db:
-        raise RuntimeError("Database not initialized")
-
-    async with self._lock:
-        await self._db.execute(
-            "DELETE FROM conversation_summaries WHERE user_id = ?",
-            (user_id,),
-        )
-        await self._db.commit()
-```
-
-**Lines added:** ~60
-
---
-
-## 3. Modify: `meshai/backends/openai_backend.py`
-
-### Add import
-
-**Before:**
-```python
-import logging
-from typing import Optional
-
-from openai import AsyncOpenAI
-
-from ..config import LLMConfig
-from .base import LLMBackend
-```
-
-**After:**
-```python
-import logging
-from typing import Optional
-
-from openai import AsyncOpenAI
-
-from ..config import LLMConfig
-from ..memory import RollingSummaryMemory  # NEW
-from .base import LLMBackend
-```
-
-### Modify `__init__()` method
-
-**Before:**
-```python
-def __init__(self, config: LLMConfig, api_key: str):
-    """Initialize OpenAI backend.
-
-    Args:
-        config: LLM configuration
-        api_key: API key to use
-    """
-    self.config = config
-    self._client = AsyncOpenAI(
-        api_key=api_key,
-        base_url=config.base_url,
-    )
-```
-
-**After:**
-```python
-def __init__(self, config: LLMConfig, api_key: str):
-    """Initialize OpenAI backend.
-
-    Args:
-        config: LLM configuration
-        api_key: API key to use
-    """
-    self.config = config
-    self._client = AsyncOpenAI(
-        api_key=api_key,
-        base_url=config.base_url,
-    )
-
-    # NEW: Initialize rolling summary memory
-    self._memory = RollingSummaryMemory(
-        client=self._client,
-        model=config.model,
-        window_size=4,
-        summarize_threshold=8,
-    )
-```
-
-### Modify `generate()` method signature and logic
-
-**Before:**
-```python
-async def generate(
-    self,
-    messages: list[dict],
-    system_prompt: str,
-    max_tokens: int = 300,
-) -> str:
-    """Generate a response using OpenAI-compatible API."""
-    # Build messages list with system prompt
-    full_messages = [{"role": "system", "content": system_prompt}]
-    full_messages.extend(messages)
-
-    try:
-        response = await self._client.chat.completions.create(
-            model=self.config.model,
-            messages=full_messages,
-            max_tokens=max_tokens,
-            temperature=0.7,
-        )
-
-        content = response.choices[0].message.content
-        return content.strip() if content else ""
-
-    except Exception as e:
-        logger.error(f"OpenAI API error: {e}")
-        raise
-```
-
-**After:**
-```python
-async def generate(
-    self,
-    messages: list[dict],
-    system_prompt: str,
-    user_id: str = None,  # NEW: optional for backward compatibility
-    max_tokens: int = 300,
-) -> str:
-    """Generate a response using OpenAI-compatible API."""
-
-    # NEW: Use memory manager if user_id provided
-    if user_id:
-        summary, recent_messages = await self._memory.get_context_messages(
-            user_id=user_id,
-            full_history=messages,
-        )
-
-        if summary:
-            # Long conversation: system + summary + recent
-            enhanced_system = f"""{system_prompt}
-
-Previous conversation summary: {summary}"""
-            full_messages = [{"role": "system", "content": enhanced_system}]
-            full_messages.extend(recent_messages)
-
-            logger.debug(
-                f"Using summary + {len(recent_messages)} recent messages "
-                f"(total history: {len(messages)})"
-            )
-        else:
-            # Short conversation: system + all messages
-            full_messages = [{"role": "system", "content": system_prompt}]
-            full_messages.extend(messages)
-    else:
-        # Old behavior: full history
-        full_messages = [{"role": "system", "content": system_prompt}]
-        full_messages.extend(messages)
-
-    try:
-        response = await self._client.chat.completions.create(
-            model=self.config.model,
-            messages=full_messages,
-            max_tokens=max_tokens,
-            temperature=0.7,
-        )
-
-        content = response.choices[0].message.content
-        return content.strip() if content else ""
-
-    except Exception as e:
-        logger.error(f"OpenAI API error: {e}")
-        raise
-```
-
-### Add helper methods (append to end of class)
-
-```python
-def load_summary_cache(self, user_id: str, summary_data: dict) -> None:
-    """Load summary into memory cache (called on startup).
-
-    Args:
-        user_id: User identifier
-        summary_data: Dict with 'summary', 'message_count', 'updated_at'
-    """
-    from ..memory import ConversationSummary
-
-    summary = ConversationSummary(
-        summary=summary_data["summary"],
-        message_count=summary_data["message_count"],
-        last_updated=summary_data["updated_at"],
-    )
-    self._memory.load_summary(user_id, summary)
-
-
-def clear_summary_cache(self, user_id: str) -> None:
-    """Clear summary cache for user."""
-    self._memory.clear_summary(user_id)
-```
-
-**Lines modified:** ~40
-**Lines added:** ~20
-
---
-
-## 4. Modify: `meshai/responder.py`
-
-### Find the response generation section
-
-**Location:** Look for where `self.backend.generate()` is called.
-
-**Before:**
-```python
-# Wherever backend.generate() is called
-response = await self.backend.generate(
-    messages=history,
-    system_prompt=self.system_prompt,
-    max_tokens=300,
-)
-```
-
-**After:**
-```python
-# Pass user_id for memory optimization
-response = await self.backend.generate(
-    messages=history,
-    system_prompt=self.system_prompt,
-    user_id=user_id,  # NEW
-    max_tokens=300,
-)
-
-# NEW: Persist summary if created
-await self._persist_summary_if_needed(user_id)
-```
-
-### Add helper method (append to class)
-
-```python
-async def _persist_summary_if_needed(self, user_id: str) -> None:
-    """Store summary to database if one was created."""
-    if hasattr(self.backend, "_memory"):
-        summary = self.backend._memory._summaries.get(user_id)
-        if summary:
-            await self.history.store_summary(
-                user_id,
-                summary.summary,
-                summary.message_count,
-            )
-```
-
-**Lines modified:** ~5
-**Lines added:** ~10
-
---
-
-## 5. Modify: `meshai/commands/reset.py`
-
-### Modify `execute()` method
-
-**Before:**
-```python
-async def execute(self, sender_id: str, args: list[str]) -> str:
-    """Reset conversation history."""
-    count = await self.responder.history.clear_history(sender_id)
-    return f"Cleared {count} messages from your history."
-```
-
-**After:**
-```python
-async def execute(self, sender_id: str, args: list[str]) -> str:
-    """Reset conversation history."""
-    count = await self.responder.history.clear_history(sender_id)
-
-    # NEW: Also clear summary
-    await self.responder.history.clear_summary(sender_id)
-    if hasattr(self.responder.backend, "clear_summary_cache"):
-        self.responder.backend.clear_summary_cache(sender_id)
-
-    return f"Cleared {count} messages from your history."
-```
-
-**Lines added:** ~4
-
---
-
-## Summary of Changes
-
-| File | Action | Lines Added | Lines Modified |
-|------|--------|-------------|----------------|
-| `meshai/memory.py` | Create new | ~100 | 0 |
-| `meshai/history.py` | Modify | ~70 | ~10 |
-| `meshai/backends/openai_backend.py` | Modify | ~30 | ~40 |
-| `meshai/responder.py` | Modify | ~10 | ~5 |
-| `meshai/commands/reset.py` | Modify | ~4 | ~2 |
-| **TOTAL** | | **~214** | **~57** |
-
-**Net new code:** ~271 lines across 5 files
-**Dependencies added:** 0
-**Breaking changes:** None (user_id parameter is optional)
-
---
-
-## Testing After Implementation
-
-### 1. Database migration (automatic)
-
-```bash
-# Just start the app - new table will be created automatically
-python -m meshai
-```
-
-### 2. Test basic conversation
-
-```python
-# Send 5 messages - should use full history (no summary yet)
-# Send 15 messages - should start summarizing
-```
-
-### 3. Verify summary storage
-
-```bash
-sqlite3 meshai_history.db
-```
-
-```sql
-- Check summaries table exists
-.tables
-
-- View summaries
-SELECT user_id, summary, message_count, updated_at
-FROM conversation_summaries;
-
-- Check conversations
-SELECT COUNT(*) FROM conversations;
-```
-
-### 4. Test reset command
-
-```
-Send: !reset
-Expected: Clears both conversations and summary
-```
-
-### 5. Monitor logs
-
-```python
-# Should see log messages like:
-# "Using summary + 8 recent messages (total history: 24)"
-```
-
---
-
-## Rollback Plan
-
-If something goes wrong:
-
-1. **Remove new file:**
-   ```bash
-   rm meshai/memory.py
-   ```
-
-2. **Revert changes:** Use git to revert the 4 modified files
-   ```bash
-   git checkout meshai/history.py
-   git checkout meshai/backends/openai_backend.py
-   git checkout meshai/responder.py
-   git checkout meshai/commands/reset.py
-   ```
-
-3. **Database is safe:** Summary table won't hurt anything, conversations table unchanged
-
-4. **No data loss:** Can drop summaries table if needed
-   ```sql
-   DROP TABLE conversation_summaries;
-   ```
-
---
-
-## Performance Validation
-
-After running for a day:
-
-```sql
-- Average messages per user
-SELECT AVG(msg_count) as avg_messages
-FROM (
-    SELECT user_id, COUNT(*) as msg_count
-    FROM conversations
-    GROUP BY user_id
-);
-
-- Users with summaries
-SELECT COUNT(*) FROM conversation_summaries;
-
-- Summary stats
-SELECT
-    AVG(message_count) as avg_summarized,
-    MIN(updated_at) as oldest_summary,
-    MAX(updated_at) as newest_summary
-FROM conversation_summaries;
-```
-
-**Expected:**
- Users with >10 messages should have summaries
- Summaries should update every ~8 new messages
- No errors in logs
-
---
-
-## Configuration Tuning
-
-If you need to adjust behavior:
-
-**In `meshai/backends/openai_backend.py`:**
-
-```python
-self._memory = RollingSummaryMemory(
-    client=self._client,
-    model=config.model,
-    window_size=4,              # ← Adjust: 3-6 typical
-    summarize_threshold=8,      # ← Adjust: 6-12 typical
-)
-```
-
-**For very short messages (like Meshtastic):**
- Try `window_size=6` (more recent context)
- Try `summarize_threshold=10` (less frequent summarization)
-
-**For longer messages:**
- Try `window_size=3` (less recent context needed)
- Try `summarize_threshold=6` (more frequent updates)
-
---
-
-## Next Steps
-
-1. Implement changes in order (create memory.py first)
-2. Test with a few users before full deployment
-3. Monitor logs for summary generation
-4. Check SQLite database for summaries
-5. Tune window_size and threshold based on actual usage
-6. Measure token savings in production
-
-Good luck! The code is solid and tested - this should be a smooth upgrade.
--- a/docs/QUICK_REFERENCE.md
+++ b/docs/QUICK_REFERENCE.md
@ -1,189 +0,0 @@
-# LLM Memory - Quick Reference Card
-
-## The Problem
-Current MeshAI sends full conversation history every request → wastes tokens, slow, expensive.
-
-## The Solution
-**Rolling Summary Memory**: Keep recent messages + LLM-generated summary of older messages.
-
-## Results
- 70-80% token reduction for long conversations
- Zero dependencies
- Works with existing stack (AsyncOpenAI + SQLite)
- ~100 lines of code
-
---
-
-## How It Works (5-Second Version)
-
-```
-Long conversation (30 messages):
-  Messages 1-22: "User discussed weather and hiking trails" (summary)
-  Messages 23-30: [sent in full]
-
-Total tokens: ~600 instead of ~2400 (75% savings)
-```
-
---
-
-## Implementation Checklist
-
- [ ] Create `meshai/memory.py` - RollingSummaryMemory class
- [ ] Modify `meshai/history.py` - Add summary table + storage methods
- [ ] Modify `meshai/backends/openai_backend.py` - Integrate memory manager
- [ ] Modify `meshai/responder.py` - Pass user_id, persist summaries
- [ ] Modify `meshai/commands/reset.py` - Clear summaries on reset
-
---
-
-## Configuration
-
-```python
-# In memory.py initialization
-RollingSummaryMemory(
-    client=self._client,
-    model=config.model,
-    window_size=4,           # Keep last 4 exchanges (8 messages)
-    summarize_threshold=8,   # Re-summarize after 8 new messages
-)
-```
-
-**Tune based on:**
- `window_size`: Smaller = more summarization, larger = more recent context
- `summarize_threshold`: Smaller = more frequent re-summarization
-
---
-
-## Database Schema Addition
-
-```sql
-CREATE TABLE conversation_summaries (
-    user_id TEXT PRIMARY KEY,
-    summary TEXT NOT NULL,
-    message_count INTEGER NOT NULL,
-    updated_at REAL NOT NULL
-);
-```
-
---
-
-## Testing
-
-```bash
-# Run proof-of-concept comparison
-python examples/memory_comparison.py
-
-# Update these first:
-# - BASE_URL (your LLM endpoint)
-# - API_KEY (your key)
-# - MODEL (your model name)
-```
-
-**Expected output:**
-```
-Approach             Tokens          Savings
----------------------------------------------
-Full History         1847            (baseline)
-Rolling Summary      512             72.3%
-Window Only          398             78.4%
-```
-
---
-
-## Key Code Snippets
-
-### Memory Manager Usage
-
-```python
-# Get optimized context
-summary, recent_messages = await memory.get_context_messages(
-    user_id=user_id,
-    full_history=all_messages,
-)
-
-# Build message list
-if summary:
-    system_prompt += f"\n\nPrevious conversation: {summary}"
-    context = [system] + recent_messages
-else:
-    context = [system] + all_messages
-```
-
-### Store Summary
-
-```python
-await history.store_summary(
-    user_id=user_id,
-    summary=summary_text,
-    message_count=len(old_messages)
-)
-```
-
-### Load Summary on Startup
-
-```python
-summary_data = await history.get_summary(user_id)
-if summary_data:
-    backend.load_summary_cache(user_id, summary_data)
-```
-
---
-
-## Performance Metrics
-
-| Messages | Full History | With Summary | Savings |
-|----------|--------------|--------------|---------|
-| 10       | 800 tokens   | 800 tokens   | 0%      |
-| 20       | 1600 tokens  | 550 tokens   | 66%     |
-| 30       | 2400 tokens  | 600 tokens   | 75%     |
-| 50       | 4000 tokens  | 650 tokens   | 84%     |
-
-**Cost Impact** (at $0.50/1M input tokens, 1000 requests/day):
- Before: $36/month
- After: $9/month
- **Savings: $27/month**
-
---
-
-## When to Use Alternatives
-
-| Use Case | Recommendation |
-|----------|----------------|
-| Simple stateless chat | Window-only memory |
-| MeshAI (your project) | **Rolling Summary** |
-| Want library solution | LangChain SummaryMemory |
-| Need semantic search | ChromaDB vector store |
-| Complex multi-day agent | MemGPT/Letta |
-
---
-
-## Troubleshooting
-
-**Summary too short/long?**
-→ Adjust `max_tokens` in `_summarize()` method (default: 150)
-
-**Summary quality poor?**
-→ Modify prompt in `_summarize()`, lower temperature
-
-**Too much overhead?**
-→ Increase `summarize_threshold` (re-summarize less often)
-
-**Want more context?**
-→ Increase `window_size` (keep more recent messages)
-
---
-
-## Documentation Files
-
-1. **MEMORY_SUMMARY.md** - Overview and recommendation (this started here)
-2. **MEMORY_RESEARCH.md** - Detailed evaluation of all 5 approaches
-3. **MEMORY_IMPLEMENTATION_GUIDE.md** - Complete step-by-step implementation
-4. **examples/memory_comparison.py** - Runnable proof-of-concept
-5. **docs/memory_approaches_comparison.txt** - Visual comparison diagrams
-6. **docs/QUICK_REFERENCE.md** - This cheat sheet
-
---
-
-## One-Liner Summary
-
-**Use Rolling Summary**: Zero deps, 75% token savings, 100 lines of code, works with your stack.
--- a/docs/memory_approaches_comparison.txt
+++ b/docs/memory_approaches_comparison.txt
@ -1,254 +0,0 @@
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                    LLM MEMORY APPROACHES COMPARISON                            ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-┌────────────────────────────────────────────────────────────────────────────────┐
-│ 1. FULL HISTORY (Current MeshAI Implementation)                               │
-├────────────────────────────────────────────────────────────────────────────────┤
-│                                                                                │
-│  Request 1:  [System] + [Msg1, Msg2]                    = 200 tokens          │
-│  Request 5:  [System] + [Msg1...Msg10]                  = 1000 tokens         │
-│  Request 10: [System] + [Msg1...Msg20]                  = 2000 tokens         │
-│  Request 20: [System] + [Msg1...Msg40]                  = 4000 tokens         │
-│                                                                                │
-│  ✓ Complete context                                                           │
-│  ✗ Linear growth in tokens                                                    │
-│  ✗ Expensive and slow for long conversations                                  │
-│  ✗ Redundant - most messages not relevant to current query                    │
-│                                                                                │
-└────────────────────────────────────────────────────────────────────────────────┘
-
-┌────────────────────────────────────────────────────────────────────────────────┐
-│ 2. WINDOW MEMORY (Keep Last N Only)                                           │
-├────────────────────────────────────────────────────────────────────────────────┤
-│                                                                                │
-│  Request 1:  [System] + [Msg1, Msg2]                    = 200 tokens          │
-│  Request 5:  [System] + [Msg7, Msg8, Msg9, Msg10]       = 500 tokens          │
-│  Request 10: [System] + [Msg17, Msg18, Msg19, Msg20]    = 500 tokens          │
-│  Request 20: [System] + [Msg37, Msg38, Msg39, Msg40]    = 500 tokens          │
-│                                                                                │
-│  ✓ Constant token usage                                                       │
-│  ✓ Very fast and cheap                                                        │
-│  ✗ Completely forgets old context                                             │
-│  ✗ Can't reference earlier conversation                                       │
-│                                                                                │
-└────────────────────────────────────────────────────────────────────────────────┘
-
-┌────────────────────────────────────────────────────────────────────────────────┐
-│ 3. ROLLING SUMMARY (RECOMMENDED)                                              │
-├────────────────────────────────────────────────────────────────────────────────┤
-│                                                                                │
-│  Request 1-5:  [System] + [Msg1...Msg10]                = 1000 tokens         │
-│                (Short conversation - no summary yet)                           │
-│                                                                                │
-│  Request 10+:  [System + Summary] + [Recent 8 msgs]     = 600 tokens          │
-│                                                                                │
-│                ┌─────────────────────────────────────┐                         │
-│                │ Summary: "User discussed weather    │                         │
-│                │ and hiking. Mt Si is 4hr moderate   │                         │
-│                │ hike, Rattlesnake is 2mi easier."   │  (100 tokens)          │
-│                └─────────────────────────────────────┘                         │
-│                           ↓                                                    │
-│                ┌─────────────────────────────────────┐                         │
-│                │ User: How crowded does it get?      │                         │
-│                │ Assistant: Very crowded weekends    │                         │
-│                │ User: Any other trails nearby?      │  (400 tokens)          │
-│                │ Assistant: Rattlesnake is closer    │                         │
-│                │ ... (last 4 exchanges)              │                         │
-│                └─────────────────────────────────────┘                         │
-│                                                                                │
-│  Request 20:   [System + Summary] + [Recent 8 msgs]     = 600 tokens          │
-│                (Summary updated every ~8 new messages)                         │
-│                                                                                │
-│  ✓ Balanced token usage (70-80% reduction)                                    │
-│  ✓ Preserves long-term context via summary                                    │
-│  ✓ Recent messages in full detail                                             │
-│  ✓ Scalable to very long conversations                                        │
-│  ✗ Small overhead for summary generation (1-2s every 8-10 msgs)               │
-│                                                                                │
-└────────────────────────────────────────────────────────────────────────────────┘
-
-┌────────────────────────────────────────────────────────────────────────────────┐
-│ 4. VECTOR STORE MEMORY (ChromaDB/Qdrant)                                      │
-├────────────────────────────────────────────────────────────────────────────────┤
-│                                                                                │
-│  Current query: "What trails are nearby?"                                     │
-│                     ↓ (embed and search)                                      │
-│  ┌──────────────────────────────────────────────────────────────────┐         │
-│  │ Vector DB: Find semantically similar past messages               │         │
-│  │  - "Mt Si is a moderate 4-hour hike" (score: 0.89)               │         │
-│  │  - "Rattlesnake Ledge has lake views" (score: 0.85)              │         │
-│  │  - "Bring water and snacks" (score: 0.62)                        │         │
-│  └──────────────────────────────────────────────────────────────────┘         │
-│                     ↓                                                          │
-│  [System + Top 3 relevant] + [Current query]             = 500 tokens         │
-│                                                                                │
-│  ✓ Semantic retrieval - finds relevant context                                │
-│  ✓ Works for sparse conversations                                             │
-│  ✓ Enables cross-conversation search                                          │
-│  ✗ Requires embeddings (API calls or local model)                             │
-│  ✗ Adds complexity (vector DB, indexing)                                      │
-│  ✗ May retrieve irrelevant "similar" messages                                 │
-│                                                                                │
-└────────────────────────────────────────────────────────────────────────────────┘
-
-┌────────────────────────────────────────────────────────────────────────────────┐
-│ 5. MEMGPT/LETTA (Self-Editing Memory)                                         │
-├────────────────────────────────────────────────────────────────────────────────┤
-│                                                                                │
-│  ┌───────────────────────────────────┐                                        │
-│  │ Core Memory (always in context):  │                                        │
-│  │  - User: Matt                     │  (50 tokens)                           │
-│  │  - Preferences: Metric units      │                                        │
-│  └───────────────────────────────────┘                                        │
-│                ↓                                                               │
-│  ┌───────────────────────────────────┐                                        │
-│  │ Recall Memory (vector search):    │                                        │
-│  │  - [Retrieved: 3 relevant msgs]   │  (300 tokens)                          │
-│  └───────────────────────────────────┘                                        │
-│                ↓                                                               │
-│  ┌───────────────────────────────────┐                                        │
-│  │ Archival Memory (long-term):      │                                        │
-│  │  - [Searchable but not loaded]    │                                        │
-│  └───────────────────────────────────┘                                        │
-│                                                                                │
-│  Agent decides what to remember/forget/search                                 │
-│                                                                                │
-│  ✓ Most sophisticated - agent manages own memory                              │
-│  ✓ Handles complex multi-day conversations                                    │
-│  ✗ Very heavy (200MB+ dependencies)                                           │
-│  ✗ Requires vector embeddings                                                 │
-│  ✗ Overkill for simple chat                                                   │
-│  ✗ Opinionated architecture (hard to integrate)                               │
-│                                                                                │
-└────────────────────────────────────────────────────────────────────────────────┘
-
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                         RECOMMENDATION MATRIX                                  ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-┌──────────────┬──────────────┬────────────┬──────────────┬──────────────────────┐
-│   Approach   │ Dependencies │   Tokens   │  Complexity  │    Use Case          │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ Full History │     None     │    High    │     Low      │ Don't use (baseline) │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ Window Only  │     None     │    Low     │     Low      │ Stateless chat bots  │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ Rolling      │              │            │              │ ✓ MESHAI             │
-│ Summary      │     None     │ Very Low   │     Low      │ ✓ Most projects      │
-│ (DIY)        │              │            │              │ ✓ Best balance       │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ LangChain    │   ~50 MB     │ Very Low   │    Medium    │ Want batteries-      │
-│ Summary      │              │            │              │ included solution    │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ Vector Store │   ~20 MB     │    Low     │    Medium    │ Semantic search,     │
-│ (ChromaDB)   │              │            │              │ long-term memory     │
-├──────────────┼──────────────┼────────────┼──────────────┼──────────────────────┤
-│ MemGPT/Letta │  ~200 MB     │    Low     │  Very High   │ Complex multi-day    │
-│              │              │            │              │ agent workflows      │
-└──────────────┴──────────────┴────────────┴──────────────┴──────────────────────┘
-
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                     PERFORMANCE COMPARISON (20 messages)                       ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-  Tokens Sent to LLM
-  ↑
-  │
-4000│  ████████████████████████████████  Full History
-  │
-3000│
-  │
-2000│
-  │
-1000│
-  │
- 600│           ██████  Rolling Summary
- 500│                   █████  Window Only
-  │                    █████  Vector Store
-  0└─────────────────────────────────────────────────────────→
-     1    5   10   15   20   25   30   35   40  (Conversation length)
-
-  Legend:
-  ████  Full History (linear growth)
-  ████  Rolling Summary (plateau after initial growth)
-  ████  Window/Vector (constant)
-
-
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                    IMPLEMENTATION COMPLEXITY                                   ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-┌─────────────────────────────────────────────────────────────────────────────┐
-│  Simple ←───────────────────────────────────────────────────→ Complex       │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│  Window Only          Rolling Summary       LangChain        MemGPT        │
-│  (20 lines)           (100 lines)           (10 lines       (200+ lines    │
-│                                             + 50MB dep)      + 200MB dep)   │
-│                                                                             │
-│  ↑                    ↑                     ↑                ↑              │
-│  No deps              No deps               Heavy deps       Very heavy     │
-│  No persistence       SQLite persist        In-memory        Built-in DB    │
-│  Loses old context    Keeps summary         Keeps summary    Multi-tier     │
-│                                                                             │
-│                       ★ RECOMMENDED ★                                       │
-└─────────────────────────────────────────────────────────────────────────────┘
-
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                      FOR MESHAI SPECIFICALLY                                   ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-Current:
-  - Messages: 150 chars max (very small)
-  - Conversations: Per-user, linear
-  - Backend: OpenAI-compatible (LiteLLM, local models)
-  - Storage: SQLite + aiosqlite
-  - Problem: Full history sent every time
-
-Constraints:
-  - Lightweight (runs on mesh nodes potentially)
-  - No heavy dependencies
-  - Must work offline (local models)
-  - Persistence required (survive restarts)
-
-Solution: Rolling Summary
-  ✓ Zero dependencies (pure Python)
-  ✓ Works with existing AsyncOpenAI client
-  ✓ Persists in existing SQLite database
-  ✓ ~100 lines of code (easy to maintain)
-  ✓ 70-80% token reduction
-  ✓ Tunable (window_size, summarize_threshold)
-
-Configuration:
-  - window_size = 4 (keep last 4 exchanges = 8 messages)
-  - summarize_threshold = 8 (re-summarize after 8 new messages)
-
-Expected savings:
-  - 10 messages: 0% (no summary yet)
-  - 20 messages: 66% token reduction
-  - 30 messages: 75% token reduction
-  - 50 messages: 84% token reduction
-
-Cost impact (at $0.50/1M tokens):
-  - Before: $0.0012 per request (2400 tokens)
-  - After:  $0.0003 per request (600 tokens)
-  - Savings: $27/month for 1000 requests/day
-
-╔════════════════════════════════════════════════════════════════════════════════╗
-║                              NEXT STEPS                                        ║
-╚════════════════════════════════════════════════════════════════════════════════╝
-
-1. Read:   MEMORY_SUMMARY.md (quick overview)
-2. Study:  MEMORY_RESEARCH.md (detailed analysis)
-3. Test:   python examples/memory_comparison.py (see it in action)
-4. Build:  MEMORY_IMPLEMENTATION_GUIDE.md (step-by-step)
-5. Deploy: Monitor and tune based on real usage
-
-Files created:
-  - /home/zvx/projects/meshai/MEMORY_SUMMARY.md
-  - /home/zvx/projects/meshai/MEMORY_RESEARCH.md
-  - /home/zvx/projects/meshai/MEMORY_IMPLEMENTATION_GUIDE.md
-  - /home/zvx/projects/meshai/examples/memory_comparison.py
-
-Good luck! 🚀
--- a/examples/memory_comparison.py
+++ b/examples/memory_comparison.py
@ -1,285 +0,0 @@
-#!/usr/bin/env python3
-"""
-Proof-of-concept: Compare full history vs rolling summary memory.
-
-Demonstrates token savings and performance of different approaches.
-
-Usage:
-    python examples/memory_comparison.py
-"""
-
-import asyncio
-import time
-from typing import Optional
-
-from openai import AsyncOpenAI
-
-
-# ============================================================================
-# SIMPLE ROLLING SUMMARY IMPLEMENTATION
-# ============================================================================
-
-
-class SimpleRollingSummary:
-    """Minimal rolling summary memory manager for testing."""
-
-    def __init__(
-        self,
-        client: AsyncOpenAI,
-        model: str,
-        window_size: int = 4,
-    ):
-        self.client = client
-        self.model = model
-        self.window_size = window_size
-        self._summary_cache = {}
-
-    async def get_context(
-        self, user_id: str, messages: list[dict]
-    ) -> tuple[Optional[str], list[dict]]:
-        """Return (summary, recent_messages) for optimized context."""
-
-        # Short conversation - return all messages
-        if len(messages) <= self.window_size * 2:
-            return None, messages
-
-        # Split old and recent
-        split = -(self.window_size * 2)
-        old = messages[:split]
-        recent = messages[split:]
-
-        # Get or create summary
-        if user_id not in self._summary_cache:
-            summary = await self._summarize(old)
-            self._summary_cache[user_id] = summary
-        else:
-            summary = self._summary_cache[user_id]
-
-        return summary, recent
-
-    async def _summarize(self, messages: list[dict]) -> str:
-        """Generate summary of messages."""
-        conv = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages])
-
-        prompt = f"""Summarize this conversation in 2-3 concise sentences:
-
-{conv}
-
-Summary:"""
-
-        response = await self.client.chat.completions.create(
-            model=self.model,
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=150,
-            temperature=0.3,
-        )
-
-        return response.choices[0].message.content.strip()
-
-
-# ============================================================================
-# COMPARISON SCENARIOS
-# ============================================================================
-
-
-async def test_full_history(client: AsyncOpenAI, model: str, messages: list[dict]):
-    """Baseline: Send full conversation history."""
-    print("\n=== FULL HISTORY APPROACH ===")
-
-    system = "You are a helpful assistant on a mesh network."
-    full = [{"role": "system", "content": system}] + messages
-
-    start = time.time()
-
-    response = await client.chat.completions.create(
-        model=model, messages=full, max_tokens=100, temperature=0.7
-    )
-
-    elapsed = time.time() - start
-
-    # Estimate tokens (rough)
-    total_chars = sum(len(m["content"]) for m in full)
-    est_tokens = total_chars // 4  # Rough estimate: 4 chars = 1 token
-
-    print(f"Messages sent: {len(full)}")
-    print(f"Est. input tokens: {est_tokens}")
-    print(f"Response: {response.choices[0].message.content[:100]}...")
-    print(f"Time: {elapsed:.2f}s")
-
-    return est_tokens, elapsed
-
-
-async def test_rolling_summary(
-    client: AsyncOpenAI, model: str, messages: list[dict], user_id: str
-):
-    """Optimized: Send summary + recent messages."""
-    print("\n=== ROLLING SUMMARY APPROACH ===")
-
-    memory = SimpleRollingSummary(client, model, window_size=4)
-
-    summary, recent = await memory.get_context(user_id, messages)
-
-    system = "You are a helpful assistant on a mesh network."
-    if summary:
-        system += f"\n\nPrevious conversation summary: {summary}"
-
-    context = [{"role": "system", "content": system}] + recent
-
-    start = time.time()
-
-    response = await client.chat.completions.create(
-        model=model, messages=context, max_tokens=100, temperature=0.7
-    )
-
-    elapsed = time.time() - start
-
-    # Estimate tokens
-    total_chars = sum(len(m["content"]) for m in context)
-    est_tokens = total_chars // 4
-
-    print(f"Messages sent: {len(context)} (summary: {summary is not None})")
-    if summary:
-        print(f"Summary: {summary[:80]}...")
-    print(f"Est. input tokens: {est_tokens}")
-    print(f"Response: {response.choices[0].message.content[:100]}...")
-    print(f"Time: {elapsed:.2f}s")
-
-    return est_tokens, elapsed
-
-
-async def test_window_only(client: AsyncOpenAI, model: str, messages: list[dict]):
-    """Simple window: Just last N messages, no summary."""
-    print("\n=== WINDOW-ONLY APPROACH ===")
-
-    window_size = 4
-    recent = messages[-(window_size * 2) :]
-
-    system = "You are a helpful assistant on a mesh network."
-    context = [{"role": "system", "content": system}] + recent
-
-    start = time.time()
-
-    response = await client.chat.completions.create(
-        model=model, messages=context, max_tokens=100, temperature=0.7
-    )
-
-    elapsed = time.time() - start
-
-    total_chars = sum(len(m["content"]) for m in context)
-    est_tokens = total_chars // 4
-
-    print(f"Messages sent: {len(context)} (last {window_size} exchanges only)")
-    print(f"Est. input tokens: {est_tokens}")
-    print(f"Response: {response.choices[0].message.content[:100]}...")
-    print(f"Time: {elapsed:.2f}s")
-
-    return est_tokens, elapsed
-
-
-# ============================================================================
-# MAIN TEST
-# ============================================================================
-
-
-async def main():
-    """Run comparison test."""
-
-    # Configure your LLM endpoint
-    # Update these for your setup (LiteLLM, local model, etc.)
-    BASE_URL = "http://192.168.1.239:8000/v1"  # LiteLLM endpoint
-    API_KEY = "sk-1234"  # Your API key
-    MODEL = "gpt-4o-mini"  # Your model
-
-    print("=" * 70)
-    print("LLM Memory Approach Comparison")
-    print("=" * 70)
-
-    # Create test conversation (simulate 15 exchanges = 30 messages)
-    messages = []
-    topics = [
-        ("What's the weather?", "It's sunny and 72°F."),
-        ("Should I bring an umbrella?", "No need, clear skies all day."),
-        ("What about tomorrow?", "Tomorrow looks rainy, bring an umbrella."),
-        ("Any hiking recommendations?", "Try Mt. Si, great views!"),
-        ("How long is the hike?", "About 4 hours round trip."),
-        ("Is it beginner friendly?", "Moderate difficulty, doable for most."),
-        ("What should I bring?", "Water, snacks, good boots, and layers."),
-        ("Are dogs allowed?", "Yes, but must be leashed."),
-        ("Where's the trailhead?", "Off I-90 near North Bend."),
-        ("Parking fee?", "Yes, $10 or Northwest Forest Pass."),
-        ("What time should I start?", "Early morning, around 7-8 AM."),
-        ("How crowded does it get?", "Very crowded on weekends, go weekdays."),
-        ("Any other trails nearby?", "Rattlesnake Ledge is easier and closer."),
-        ("Tell me about Rattlesnake", "2 miles, great lake views, very popular."),
-        ("Which would you recommend?", "If fit: Mt Si. If casual: Rattlesnake."),
-    ]
-
-    for user_msg, assistant_msg in topics:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": assistant_msg})
-
-    print(f"\nTest conversation: {len(messages)} messages ({len(messages)//2} exchanges)")
-    print(f"Topics: weather → hiking → trails")
-    print(f"Message lengths: {min(len(m['content']) for m in messages)}-{max(len(m['content']) for m in messages)} chars")
-
-    # Initialize client
-    client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
-
-    try:
-        # Test each approach
-        full_tokens, full_time = await test_full_history(client, MODEL, messages)
-        summary_tokens, summary_time = await test_rolling_summary(
-            client, MODEL, messages, "!test_user"
-        )
-        window_tokens, window_time = await test_window_only(client, MODEL, messages)
-
-        # Results
-        print("\n" + "=" * 70)
-        print("COMPARISON RESULTS")
-        print("=" * 70)
-
-        print(f"\n{'Approach':<20} {'Tokens':<15} {'Time':<10} {'Savings'}")
-        print("-" * 70)
-        print(
-            f"{'Full History':<20} {full_tokens:<15} {full_time:<10.2f}s {'(baseline)'}"
-        )
-        print(
-            f"{'Rolling Summary':<20} {summary_tokens:<15} {summary_time:<10.2f}s "
-            f"{(1 - summary_tokens/full_tokens)*100:.1f}%"
-        )
-        print(
-            f"{'Window Only':<20} {window_tokens:<15} {window_time:<10.2f}s "
-            f"{(1 - window_tokens/full_tokens)*100:.1f}%"
-        )
-
-        print("\n" + "=" * 70)
-        print("RECOMMENDATIONS")
-        print("=" * 70)
-
-        print("\nFull History:")
-        print("  ✓ Complete context")
-        print("  ✗ High token usage")
-        print("  ✗ Slower for long conversations")
-        print("  Use: Never (inefficient)")
-
-        print("\nWindow Only:")
-        print("  ✓ Very low token usage")
-        print("  ✓ Fast")
-        print("  ✗ Loses older context completely")
-        print("  Use: Short-term conversations only")
-
-        print("\nRolling Summary:")
-        print("  ✓ Balanced token usage")
-        print("  ✓ Preserves long-term context")
-        print("  ✓ Fast after initial summary")
-        print("  ✗ Slight overhead for summarization")
-        print("  Use: RECOMMENDED for MeshAI")
-
-        print("\n" + "=" * 70)
-
-    finally:
-        await client.close()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())