mirror of
https://github.com/zvx-echo6/meshai.git
synced 2026-05-21 23:24:44 +02:00
Features: - Multi-backend LLM support (OpenAI, Anthropic, Google) - Rolling summary memory for token optimization (~70-80% reduction) - Per-user conversation history with SQLite persistence - Bang commands (!help, !ping, !reset, !status, !weather) - Meshtastic integration via serial or TCP - Message chunking for mesh network constraints (150 char limit) - Rate limiting to prevent network congestion - Rich TUI configurator - Docker support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
285 lines
9.3 KiB
Python
Executable file
285 lines
9.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Proof-of-concept: Compare full history vs rolling summary memory.
|
|
|
|
Demonstrates token savings and performance of different approaches.
|
|
|
|
Usage:
|
|
python examples/memory_comparison.py
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
from typing import Optional
|
|
|
|
from openai import AsyncOpenAI
|
|
|
|
|
|
# ============================================================================
|
|
# SIMPLE ROLLING SUMMARY IMPLEMENTATION
|
|
# ============================================================================
|
|
|
|
|
|
class SimpleRollingSummary:
|
|
"""Minimal rolling summary memory manager for testing."""
|
|
|
|
def __init__(
|
|
self,
|
|
client: AsyncOpenAI,
|
|
model: str,
|
|
window_size: int = 4,
|
|
):
|
|
self.client = client
|
|
self.model = model
|
|
self.window_size = window_size
|
|
self._summary_cache = {}
|
|
|
|
async def get_context(
|
|
self, user_id: str, messages: list[dict]
|
|
) -> tuple[Optional[str], list[dict]]:
|
|
"""Return (summary, recent_messages) for optimized context."""
|
|
|
|
# Short conversation - return all messages
|
|
if len(messages) <= self.window_size * 2:
|
|
return None, messages
|
|
|
|
# Split old and recent
|
|
split = -(self.window_size * 2)
|
|
old = messages[:split]
|
|
recent = messages[split:]
|
|
|
|
# Get or create summary
|
|
if user_id not in self._summary_cache:
|
|
summary = await self._summarize(old)
|
|
self._summary_cache[user_id] = summary
|
|
else:
|
|
summary = self._summary_cache[user_id]
|
|
|
|
return summary, recent
|
|
|
|
async def _summarize(self, messages: list[dict]) -> str:
|
|
"""Generate summary of messages."""
|
|
conv = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages])
|
|
|
|
prompt = f"""Summarize this conversation in 2-3 concise sentences:
|
|
|
|
{conv}
|
|
|
|
Summary:"""
|
|
|
|
response = await self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
max_tokens=150,
|
|
temperature=0.3,
|
|
)
|
|
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
|
# ============================================================================
|
|
# COMPARISON SCENARIOS
|
|
# ============================================================================
|
|
|
|
|
|
async def test_full_history(client: AsyncOpenAI, model: str, messages: list[dict]):
|
|
"""Baseline: Send full conversation history."""
|
|
print("\n=== FULL HISTORY APPROACH ===")
|
|
|
|
system = "You are a helpful assistant on a mesh network."
|
|
full = [{"role": "system", "content": system}] + messages
|
|
|
|
start = time.time()
|
|
|
|
response = await client.chat.completions.create(
|
|
model=model, messages=full, max_tokens=100, temperature=0.7
|
|
)
|
|
|
|
elapsed = time.time() - start
|
|
|
|
# Estimate tokens (rough)
|
|
total_chars = sum(len(m["content"]) for m in full)
|
|
est_tokens = total_chars // 4 # Rough estimate: 4 chars = 1 token
|
|
|
|
print(f"Messages sent: {len(full)}")
|
|
print(f"Est. input tokens: {est_tokens}")
|
|
print(f"Response: {response.choices[0].message.content[:100]}...")
|
|
print(f"Time: {elapsed:.2f}s")
|
|
|
|
return est_tokens, elapsed
|
|
|
|
|
|
async def test_rolling_summary(
|
|
client: AsyncOpenAI, model: str, messages: list[dict], user_id: str
|
|
):
|
|
"""Optimized: Send summary + recent messages."""
|
|
print("\n=== ROLLING SUMMARY APPROACH ===")
|
|
|
|
memory = SimpleRollingSummary(client, model, window_size=4)
|
|
|
|
summary, recent = await memory.get_context(user_id, messages)
|
|
|
|
system = "You are a helpful assistant on a mesh network."
|
|
if summary:
|
|
system += f"\n\nPrevious conversation summary: {summary}"
|
|
|
|
context = [{"role": "system", "content": system}] + recent
|
|
|
|
start = time.time()
|
|
|
|
response = await client.chat.completions.create(
|
|
model=model, messages=context, max_tokens=100, temperature=0.7
|
|
)
|
|
|
|
elapsed = time.time() - start
|
|
|
|
# Estimate tokens
|
|
total_chars = sum(len(m["content"]) for m in context)
|
|
est_tokens = total_chars // 4
|
|
|
|
print(f"Messages sent: {len(context)} (summary: {summary is not None})")
|
|
if summary:
|
|
print(f"Summary: {summary[:80]}...")
|
|
print(f"Est. input tokens: {est_tokens}")
|
|
print(f"Response: {response.choices[0].message.content[:100]}...")
|
|
print(f"Time: {elapsed:.2f}s")
|
|
|
|
return est_tokens, elapsed
|
|
|
|
|
|
async def test_window_only(client: AsyncOpenAI, model: str, messages: list[dict]):
|
|
"""Simple window: Just last N messages, no summary."""
|
|
print("\n=== WINDOW-ONLY APPROACH ===")
|
|
|
|
window_size = 4
|
|
recent = messages[-(window_size * 2) :]
|
|
|
|
system = "You are a helpful assistant on a mesh network."
|
|
context = [{"role": "system", "content": system}] + recent
|
|
|
|
start = time.time()
|
|
|
|
response = await client.chat.completions.create(
|
|
model=model, messages=context, max_tokens=100, temperature=0.7
|
|
)
|
|
|
|
elapsed = time.time() - start
|
|
|
|
total_chars = sum(len(m["content"]) for m in context)
|
|
est_tokens = total_chars // 4
|
|
|
|
print(f"Messages sent: {len(context)} (last {window_size} exchanges only)")
|
|
print(f"Est. input tokens: {est_tokens}")
|
|
print(f"Response: {response.choices[0].message.content[:100]}...")
|
|
print(f"Time: {elapsed:.2f}s")
|
|
|
|
return est_tokens, elapsed
|
|
|
|
|
|
# ============================================================================
|
|
# MAIN TEST
|
|
# ============================================================================
|
|
|
|
|
|
async def main():
|
|
"""Run comparison test."""
|
|
|
|
# Configure your LLM endpoint
|
|
# Update these for your setup (LiteLLM, local model, etc.)
|
|
BASE_URL = "http://192.168.1.239:8000/v1" # LiteLLM endpoint
|
|
API_KEY = "sk-1234" # Your API key
|
|
MODEL = "gpt-4o-mini" # Your model
|
|
|
|
print("=" * 70)
|
|
print("LLM Memory Approach Comparison")
|
|
print("=" * 70)
|
|
|
|
# Create test conversation (simulate 15 exchanges = 30 messages)
|
|
messages = []
|
|
topics = [
|
|
("What's the weather?", "It's sunny and 72°F."),
|
|
("Should I bring an umbrella?", "No need, clear skies all day."),
|
|
("What about tomorrow?", "Tomorrow looks rainy, bring an umbrella."),
|
|
("Any hiking recommendations?", "Try Mt. Si, great views!"),
|
|
("How long is the hike?", "About 4 hours round trip."),
|
|
("Is it beginner friendly?", "Moderate difficulty, doable for most."),
|
|
("What should I bring?", "Water, snacks, good boots, and layers."),
|
|
("Are dogs allowed?", "Yes, but must be leashed."),
|
|
("Where's the trailhead?", "Off I-90 near North Bend."),
|
|
("Parking fee?", "Yes, $10 or Northwest Forest Pass."),
|
|
("What time should I start?", "Early morning, around 7-8 AM."),
|
|
("How crowded does it get?", "Very crowded on weekends, go weekdays."),
|
|
("Any other trails nearby?", "Rattlesnake Ledge is easier and closer."),
|
|
("Tell me about Rattlesnake", "2 miles, great lake views, very popular."),
|
|
("Which would you recommend?", "If fit: Mt Si. If casual: Rattlesnake."),
|
|
]
|
|
|
|
for user_msg, assistant_msg in topics:
|
|
messages.append({"role": "user", "content": user_msg})
|
|
messages.append({"role": "assistant", "content": assistant_msg})
|
|
|
|
print(f"\nTest conversation: {len(messages)} messages ({len(messages)//2} exchanges)")
|
|
print(f"Topics: weather → hiking → trails")
|
|
print(f"Message lengths: {min(len(m['content']) for m in messages)}-{max(len(m['content']) for m in messages)} chars")
|
|
|
|
# Initialize client
|
|
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
|
|
|
|
try:
|
|
# Test each approach
|
|
full_tokens, full_time = await test_full_history(client, MODEL, messages)
|
|
summary_tokens, summary_time = await test_rolling_summary(
|
|
client, MODEL, messages, "!test_user"
|
|
)
|
|
window_tokens, window_time = await test_window_only(client, MODEL, messages)
|
|
|
|
# Results
|
|
print("\n" + "=" * 70)
|
|
print("COMPARISON RESULTS")
|
|
print("=" * 70)
|
|
|
|
print(f"\n{'Approach':<20} {'Tokens':<15} {'Time':<10} {'Savings'}")
|
|
print("-" * 70)
|
|
print(
|
|
f"{'Full History':<20} {full_tokens:<15} {full_time:<10.2f}s {'(baseline)'}"
|
|
)
|
|
print(
|
|
f"{'Rolling Summary':<20} {summary_tokens:<15} {summary_time:<10.2f}s "
|
|
f"{(1 - summary_tokens/full_tokens)*100:.1f}%"
|
|
)
|
|
print(
|
|
f"{'Window Only':<20} {window_tokens:<15} {window_time:<10.2f}s "
|
|
f"{(1 - window_tokens/full_tokens)*100:.1f}%"
|
|
)
|
|
|
|
print("\n" + "=" * 70)
|
|
print("RECOMMENDATIONS")
|
|
print("=" * 70)
|
|
|
|
print("\nFull History:")
|
|
print(" ✓ Complete context")
|
|
print(" ✗ High token usage")
|
|
print(" ✗ Slower for long conversations")
|
|
print(" Use: Never (inefficient)")
|
|
|
|
print("\nWindow Only:")
|
|
print(" ✓ Very low token usage")
|
|
print(" ✓ Fast")
|
|
print(" ✗ Loses older context completely")
|
|
print(" Use: Short-term conversations only")
|
|
|
|
print("\nRolling Summary:")
|
|
print(" ✓ Balanced token usage")
|
|
print(" ✓ Preserves long-term context")
|
|
print(" ✓ Fast after initial summary")
|
|
print(" ✗ Slight overhead for summarization")
|
|
print(" Use: RECOMMENDED for MeshAI")
|
|
|
|
print("\n" + "=" * 70)
|
|
|
|
finally:
|
|
await client.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|