mirror of
https://github.com/zvx-echo6/meshai.git
synced 2026-05-21 23:24:44 +02:00
Remove FallbackBackend and generate_with_search from all backends
FallbackBackend was never configured in practice. generate_with_search was an unused abstract method on all backends. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
10bc94b273
commit
784b2e3fea
6 changed files with 1 additions and 289 deletions
|
|
@ -4,13 +4,10 @@ from .base import LLMBackend
|
||||||
from .openai_backend import OpenAIBackend
|
from .openai_backend import OpenAIBackend
|
||||||
from .anthropic_backend import AnthropicBackend
|
from .anthropic_backend import AnthropicBackend
|
||||||
from .google_backend import GoogleBackend
|
from .google_backend import GoogleBackend
|
||||||
from .fallback import FallbackBackend, create_backend
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"LLMBackend",
|
"LLMBackend",
|
||||||
"OpenAIBackend",
|
"OpenAIBackend",
|
||||||
"AnthropicBackend",
|
"AnthropicBackend",
|
||||||
"GoogleBackend",
|
"GoogleBackend",
|
||||||
"FallbackBackend",
|
|
||||||
"create_backend",
|
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -133,21 +133,6 @@ class AnthropicBackend(LLMBackend):
|
||||||
"""Get the memory manager instance."""
|
"""Get the memory manager instance."""
|
||||||
return self._memory
|
return self._memory
|
||||||
|
|
||||||
async def generate_with_search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
system_prompt: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate response - Anthropic doesn't have built-in search."""
|
|
||||||
prompt = system_prompt or (
|
|
||||||
"You are a helpful assistant. Answer the following question "
|
|
||||||
"based on your knowledge."
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": query}]
|
|
||||||
|
|
||||||
return await self.generate(messages, prompt, max_tokens=300)
|
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
"""Close the client."""
|
"""Close the client."""
|
||||||
await self._client.close()
|
await self._client.close()
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,7 @@
|
||||||
"""Base class for LLM backends."""
|
"""Base class for LLM backends."""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from ..memory import ConversationSummary
|
|
||||||
|
|
||||||
|
|
||||||
class LLMBackend(ABC):
|
class LLMBackend(ABC):
|
||||||
|
|
@ -35,23 +32,6 @@ class LLMBackend(ABC):
|
||||||
"""Get the memory manager instance. Override in subclasses."""
|
"""Get the memory manager instance. Override in subclasses."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
async def generate_with_search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
system_prompt: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate a response with web search capability.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: Search/question to answer
|
|
||||||
system_prompt: Optional system prompt
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Generated response text
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
"""Clean up resources. Override if needed."""
|
"""Clean up resources. Override if needed."""
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -1,218 +0,0 @@
|
||||||
"""Fallback-aware LLM backend wrapper."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from ..config import LLMConfig, LLMBackendConfig
|
|
||||||
from .base import LLMBackend
|
|
||||||
from .openai_backend import OpenAIBackend
|
|
||||||
from .anthropic_backend import AnthropicBackend
|
|
||||||
from .google_backend import GoogleBackend
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def create_backend(
|
|
||||||
backend_type: str,
|
|
||||||
api_key: str,
|
|
||||||
base_url: str,
|
|
||||||
model: str,
|
|
||||||
timeout: int,
|
|
||||||
window_size: int = 0,
|
|
||||||
summarize_threshold: int = 8,
|
|
||||||
) -> LLMBackend:
|
|
||||||
"""Create an LLM backend instance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
backend_type: Type of backend (openai, anthropic, google)
|
|
||||||
api_key: API key for the backend
|
|
||||||
base_url: Base URL for the API
|
|
||||||
model: Model name to use
|
|
||||||
timeout: Request timeout in seconds
|
|
||||||
window_size: Memory window size
|
|
||||||
summarize_threshold: When to summarize older messages
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Configured LLM backend instance
|
|
||||||
"""
|
|
||||||
# Create a minimal config object for the backend
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MinimalLLMConfig:
|
|
||||||
backend: str
|
|
||||||
api_key: str
|
|
||||||
base_url: str
|
|
||||||
model: str
|
|
||||||
system_prompt: str = ""
|
|
||||||
|
|
||||||
config = MinimalLLMConfig(
|
|
||||||
backend=backend_type,
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=base_url,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
|
|
||||||
backend_type = backend_type.lower()
|
|
||||||
if backend_type == "openai":
|
|
||||||
return OpenAIBackend(config, api_key, window_size, summarize_threshold)
|
|
||||||
elif backend_type == "anthropic":
|
|
||||||
return AnthropicBackend(config, api_key, window_size, summarize_threshold)
|
|
||||||
elif backend_type == "google":
|
|
||||||
return GoogleBackend(config, api_key, window_size, summarize_threshold)
|
|
||||||
else:
|
|
||||||
logger.warning(f"Unknown backend '{backend_type}', defaulting to OpenAI")
|
|
||||||
return OpenAIBackend(config, api_key, window_size, summarize_threshold)
|
|
||||||
|
|
||||||
|
|
||||||
class FallbackBackend(LLMBackend):
|
|
||||||
"""LLM backend with automatic fallback support."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
config: LLMConfig,
|
|
||||||
api_key: str,
|
|
||||||
window_size: int = 0,
|
|
||||||
summarize_threshold: int = 8,
|
|
||||||
):
|
|
||||||
self.config = config
|
|
||||||
self.api_key = api_key
|
|
||||||
self.window_size = window_size
|
|
||||||
self.summarize_threshold = summarize_threshold
|
|
||||||
|
|
||||||
# Create primary backend
|
|
||||||
self.primary = create_backend(
|
|
||||||
backend_type=config.backend,
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=config.base_url,
|
|
||||||
model=config.model,
|
|
||||||
timeout=config.timeout,
|
|
||||||
window_size=window_size,
|
|
||||||
summarize_threshold=summarize_threshold,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create fallback backend if configured
|
|
||||||
self.fallback: Optional[LLMBackend] = None
|
|
||||||
if config.fallback:
|
|
||||||
fb = config.fallback
|
|
||||||
fb_api_key = fb.api_key or api_key # Use primary key if not specified
|
|
||||||
self.fallback = create_backend(
|
|
||||||
backend_type=fb.backend,
|
|
||||||
api_key=fb_api_key,
|
|
||||||
base_url=fb.base_url,
|
|
||||||
model=fb.model,
|
|
||||||
timeout=fb.timeout,
|
|
||||||
window_size=window_size,
|
|
||||||
summarize_threshold=summarize_threshold,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._using_fallback = False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def using_fallback(self) -> bool:
|
|
||||||
"""Whether we're currently using the fallback backend."""
|
|
||||||
return self._using_fallback
|
|
||||||
|
|
||||||
def get_memory(self):
|
|
||||||
"""Get memory from the active backend."""
|
|
||||||
if self._using_fallback and self.fallback:
|
|
||||||
return self.fallback.get_memory()
|
|
||||||
return self.primary.get_memory()
|
|
||||||
|
|
||||||
async def generate(
|
|
||||||
self,
|
|
||||||
messages: list[dict],
|
|
||||||
system_prompt: str,
|
|
||||||
max_tokens: int = 300,
|
|
||||||
user_id: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate with automatic fallback."""
|
|
||||||
last_error = None
|
|
||||||
|
|
||||||
# Try primary
|
|
||||||
for attempt in range(self.config.retry_attempts):
|
|
||||||
try:
|
|
||||||
result = await asyncio.wait_for(
|
|
||||||
self.primary.generate(messages, system_prompt, max_tokens, user_id),
|
|
||||||
timeout=self.config.timeout,
|
|
||||||
)
|
|
||||||
self._using_fallback = False
|
|
||||||
return result
|
|
||||||
except asyncio.TimeoutError as e:
|
|
||||||
logger.warning(f"Primary backend timeout (attempt {attempt + 1})")
|
|
||||||
last_error = e
|
|
||||||
if not self.config.fallback_on_timeout:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Primary backend error (attempt {attempt + 1}): {e}")
|
|
||||||
last_error = e
|
|
||||||
if not self.config.fallback_on_error:
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Try fallback if available
|
|
||||||
if self.fallback:
|
|
||||||
logger.info("Switching to fallback backend")
|
|
||||||
try:
|
|
||||||
result = await asyncio.wait_for(
|
|
||||||
self.fallback.generate(messages, system_prompt, max_tokens, user_id),
|
|
||||||
timeout=self.config.fallback.timeout if self.config.fallback else 30,
|
|
||||||
)
|
|
||||||
self._using_fallback = True
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Fallback backend also failed: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# No fallback, raise the last error
|
|
||||||
if last_error:
|
|
||||||
raise last_error
|
|
||||||
raise RuntimeError("All LLM backends failed")
|
|
||||||
|
|
||||||
async def generate_with_search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
system_prompt: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate with search using automatic fallback."""
|
|
||||||
last_error = None
|
|
||||||
|
|
||||||
# Try primary
|
|
||||||
try:
|
|
||||||
result = await asyncio.wait_for(
|
|
||||||
self.primary.generate_with_search(query, system_prompt),
|
|
||||||
timeout=self.config.timeout,
|
|
||||||
)
|
|
||||||
self._using_fallback = False
|
|
||||||
return result
|
|
||||||
except asyncio.TimeoutError as e:
|
|
||||||
logger.warning("Primary backend timeout for search")
|
|
||||||
last_error = e
|
|
||||||
if not self.config.fallback_on_timeout:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Primary backend search error: {e}")
|
|
||||||
last_error = e
|
|
||||||
if not self.config.fallback_on_error:
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Try fallback
|
|
||||||
if self.fallback:
|
|
||||||
logger.info("Switching to fallback backend for search")
|
|
||||||
try:
|
|
||||||
result = await self.fallback.generate_with_search(query, system_prompt)
|
|
||||||
self._using_fallback = True
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Fallback search also failed: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
if last_error:
|
|
||||||
raise last_error
|
|
||||||
raise RuntimeError("All LLM backends failed")
|
|
||||||
|
|
||||||
async def close(self) -> None:
|
|
||||||
"""Close both backends."""
|
|
||||||
await self.primary.close()
|
|
||||||
if self.fallback:
|
|
||||||
await self.fallback.close()
|
|
||||||
|
|
@ -150,18 +150,6 @@ class GoogleBackend(LLMBackend):
|
||||||
"""Get the memory manager instance."""
|
"""Get the memory manager instance."""
|
||||||
return self._memory
|
return self._memory
|
||||||
|
|
||||||
async def generate_with_search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
system_prompt: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate response - uses Gemini's built-in grounding if available."""
|
|
||||||
prompt = system_prompt or "You are a helpful assistant."
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": query}]
|
|
||||||
|
|
||||||
return await self.generate(messages, prompt, max_tokens=300)
|
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
"""Clean up - nothing to close for Google client."""
|
"""Clean up - nothing to close for Google client."""
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -147,26 +147,6 @@ class OpenAIBackend(LLMBackend):
|
||||||
"""Get the memory manager instance."""
|
"""Get the memory manager instance."""
|
||||||
return self._memory
|
return self._memory
|
||||||
|
|
||||||
async def generate_with_search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
system_prompt: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate response - search depends on model/provider capabilities.
|
|
||||||
|
|
||||||
Note: True web search requires the model/provider to support it
|
|
||||||
(e.g., OpenAI with plugins, or a local setup with SearXNG).
|
|
||||||
This implementation just passes the query as a regular message.
|
|
||||||
"""
|
|
||||||
prompt = system_prompt or (
|
|
||||||
"You are a helpful assistant. Answer the following question. "
|
|
||||||
"If you have web search access, use it for current information."
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": query}]
|
|
||||||
|
|
||||||
return await self.generate(messages, prompt, max_tokens=300)
|
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
"""Close the client."""
|
"""Close the client."""
|
||||||
await self._client.close()
|
await self._client.close()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue