diff --git a/meshai/backends/__init__.py b/meshai/backends/__init__.py
index 0cb636f..1fabb9e 100644
--- a/meshai/backends/__init__.py
+++ b/meshai/backends/__init__.py
@@ -4,13 +4,10 @@ from .base import LLMBackend
 from .openai_backend import OpenAIBackend
 from .anthropic_backend import AnthropicBackend
 from .google_backend import GoogleBackend
-from .fallback import FallbackBackend, create_backend
 
 __all__ = [
     "LLMBackend",
     "OpenAIBackend",
     "AnthropicBackend",
     "GoogleBackend",
-    "FallbackBackend",
-    "create_backend",
 ]
diff --git a/meshai/backends/anthropic_backend.py b/meshai/backends/anthropic_backend.py
index 223a1bf..06cdc88 100644
--- a/meshai/backends/anthropic_backend.py
+++ b/meshai/backends/anthropic_backend.py
@@ -133,21 +133,6 @@ class AnthropicBackend(LLMBackend):
         """Get the memory manager instance."""
         return self._memory
 
-    async def generate_with_search(
-        self,
-        query: str,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Generate response - Anthropic doesn't have built-in search."""
-        prompt = system_prompt or (
-            "You are a helpful assistant. Answer the following question "
-            "based on your knowledge."
-        )
-
-        messages = [{"role": "user", "content": query}]
-
-        return await self.generate(messages, prompt, max_tokens=300)
-
     async def close(self) -> None:
         """Close the client."""
         await self._client.close()
diff --git a/meshai/backends/base.py b/meshai/backends/base.py
index 17b6e4b..6843e00 100644
--- a/meshai/backends/base.py
+++ b/meshai/backends/base.py
@@ -1,10 +1,7 @@
 """Base class for LLM backends."""
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional
-
-if TYPE_CHECKING:
-    from ..memory import ConversationSummary
+from typing import Optional
 
 
 class LLMBackend(ABC):
@@ -35,23 +32,6 @@ class LLMBackend(ABC):
         """Get the memory manager instance. Override in subclasses."""
         return None
 
-    @abstractmethod
-    async def generate_with_search(
-        self,
-        query: str,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Generate a response with web search capability.
-
-        Args:
-            query: Search/question to answer
-            system_prompt: Optional system prompt
-
-        Returns:
-            Generated response text
-        """
-        pass
-
     async def close(self) -> None:
         """Clean up resources. Override if needed."""
         pass
diff --git a/meshai/backends/fallback.py b/meshai/backends/fallback.py
deleted file mode 100644
index b660122..0000000
--- a/meshai/backends/fallback.py
+++ /dev/null
@@ -1,218 +0,0 @@
-"""Fallback-aware LLM backend wrapper."""
-
-import asyncio
-import logging
-from typing import Optional
-
-from ..config import LLMConfig, LLMBackendConfig
-from .base import LLMBackend
-from .openai_backend import OpenAIBackend
-from .anthropic_backend import AnthropicBackend
-from .google_backend import GoogleBackend
-
-logger = logging.getLogger(__name__)
-
-
-def create_backend(
-    backend_type: str,
-    api_key: str,
-    base_url: str,
-    model: str,
-    timeout: int,
-    window_size: int = 0,
-    summarize_threshold: int = 8,
-) -> LLMBackend:
-    """Create an LLM backend instance.
-
-    Args:
-        backend_type: Type of backend (openai, anthropic, google)
-        api_key: API key for the backend
-        base_url: Base URL for the API
-        model: Model name to use
-        timeout: Request timeout in seconds
-        window_size: Memory window size
-        summarize_threshold: When to summarize older messages
-
-    Returns:
-        Configured LLM backend instance
-    """
-    # Create a minimal config object for the backend
-    from dataclasses import dataclass
-
-    @dataclass
-    class MinimalLLMConfig:
-        backend: str
-        api_key: str
-        base_url: str
-        model: str
-        system_prompt: str = ""
-
-    config = MinimalLLMConfig(
-        backend=backend_type,
-        api_key=api_key,
-        base_url=base_url,
-        model=model,
-    )
-
-    backend_type = backend_type.lower()
-    if backend_type == "openai":
-        return OpenAIBackend(config, api_key, window_size, summarize_threshold)
-    elif backend_type == "anthropic":
-        return AnthropicBackend(config, api_key, window_size, summarize_threshold)
-    elif backend_type == "google":
-        return GoogleBackend(config, api_key, window_size, summarize_threshold)
-    else:
-        logger.warning(f"Unknown backend '{backend_type}', defaulting to OpenAI")
-        return OpenAIBackend(config, api_key, window_size, summarize_threshold)
-
-
-class FallbackBackend(LLMBackend):
-    """LLM backend with automatic fallback support."""
-
-    def __init__(
-        self,
-        config: LLMConfig,
-        api_key: str,
-        window_size: int = 0,
-        summarize_threshold: int = 8,
-    ):
-        self.config = config
-        self.api_key = api_key
-        self.window_size = window_size
-        self.summarize_threshold = summarize_threshold
-
-        # Create primary backend
-        self.primary = create_backend(
-            backend_type=config.backend,
-            api_key=api_key,
-            base_url=config.base_url,
-            model=config.model,
-            timeout=config.timeout,
-            window_size=window_size,
-            summarize_threshold=summarize_threshold,
-        )
-
-        # Create fallback backend if configured
-        self.fallback: Optional[LLMBackend] = None
-        if config.fallback:
-            fb = config.fallback
-            fb_api_key = fb.api_key or api_key  # Use primary key if not specified
-            self.fallback = create_backend(
-                backend_type=fb.backend,
-                api_key=fb_api_key,
-                base_url=fb.base_url,
-                model=fb.model,
-                timeout=fb.timeout,
-                window_size=window_size,
-                summarize_threshold=summarize_threshold,
-            )
-
-        self._using_fallback = False
-
-    @property
-    def using_fallback(self) -> bool:
-        """Whether we're currently using the fallback backend."""
-        return self._using_fallback
-
-    def get_memory(self):
-        """Get memory from the active backend."""
-        if self._using_fallback and self.fallback:
-            return self.fallback.get_memory()
-        return self.primary.get_memory()
-
-    async def generate(
-        self,
-        messages: list[dict],
-        system_prompt: str,
-        max_tokens: int = 300,
-        user_id: Optional[str] = None,
-    ) -> str:
-        """Generate with automatic fallback."""
-        last_error = None
-
-        # Try primary
-        for attempt in range(self.config.retry_attempts):
-            try:
-                result = await asyncio.wait_for(
-                    self.primary.generate(messages, system_prompt, max_tokens, user_id),
-                    timeout=self.config.timeout,
-                )
-                self._using_fallback = False
-                return result
-            except asyncio.TimeoutError as e:
-                logger.warning(f"Primary backend timeout (attempt {attempt + 1})")
-                last_error = e
-                if not self.config.fallback_on_timeout:
-                    raise
-            except Exception as e:
-                logger.warning(f"Primary backend error (attempt {attempt + 1}): {e}")
-                last_error = e
-                if not self.config.fallback_on_error:
-                    raise
-
-        # Try fallback if available
-        if self.fallback:
-            logger.info("Switching to fallback backend")
-            try:
-                result = await asyncio.wait_for(
-                    self.fallback.generate(messages, system_prompt, max_tokens, user_id),
-                    timeout=self.config.fallback.timeout if self.config.fallback else 30,
-                )
-                self._using_fallback = True
-                return result
-            except Exception as e:
-                logger.error(f"Fallback backend also failed: {e}")
-                raise
-
-        # No fallback, raise the last error
-        if last_error:
-            raise last_error
-        raise RuntimeError("All LLM backends failed")
-
-    async def generate_with_search(
-        self,
-        query: str,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Generate with search using automatic fallback."""
-        last_error = None
-
-        # Try primary
-        try:
-            result = await asyncio.wait_for(
-                self.primary.generate_with_search(query, system_prompt),
-                timeout=self.config.timeout,
-            )
-            self._using_fallback = False
-            return result
-        except asyncio.TimeoutError as e:
-            logger.warning("Primary backend timeout for search")
-            last_error = e
-            if not self.config.fallback_on_timeout:
-                raise
-        except Exception as e:
-            logger.warning(f"Primary backend search error: {e}")
-            last_error = e
-            if not self.config.fallback_on_error:
-                raise
-
-        # Try fallback
-        if self.fallback:
-            logger.info("Switching to fallback backend for search")
-            try:
-                result = await self.fallback.generate_with_search(query, system_prompt)
-                self._using_fallback = True
-                return result
-            except Exception as e:
-                logger.error(f"Fallback search also failed: {e}")
-                raise
-
-        if last_error:
-            raise last_error
-        raise RuntimeError("All LLM backends failed")
-
-    async def close(self) -> None:
-        """Close both backends."""
-        await self.primary.close()
-        if self.fallback:
-            await self.fallback.close()
diff --git a/meshai/backends/google_backend.py b/meshai/backends/google_backend.py
index 400e3d3..a0ab0f4 100644
--- a/meshai/backends/google_backend.py
+++ b/meshai/backends/google_backend.py
@@ -150,18 +150,6 @@ class GoogleBackend(LLMBackend):
         """Get the memory manager instance."""
         return self._memory
 
-    async def generate_with_search(
-        self,
-        query: str,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Generate response - uses Gemini's built-in grounding if available."""
-        prompt = system_prompt or "You are a helpful assistant."
-
-        messages = [{"role": "user", "content": query}]
-
-        return await self.generate(messages, prompt, max_tokens=300)
-
     async def close(self) -> None:
         """Clean up - nothing to close for Google client."""
         pass
diff --git a/meshai/backends/openai_backend.py b/meshai/backends/openai_backend.py
index 9fc263c..d776daf 100644
--- a/meshai/backends/openai_backend.py
+++ b/meshai/backends/openai_backend.py
@@ -147,26 +147,6 @@ class OpenAIBackend(LLMBackend):
         """Get the memory manager instance."""
         return self._memory
 
-    async def generate_with_search(
-        self,
-        query: str,
-        system_prompt: Optional[str] = None,
-    ) -> str:
-        """Generate response - search depends on model/provider capabilities.
-
-        Note: True web search requires the model/provider to support it
-        (e.g., OpenAI with plugins, or a local setup with SearXNG).
-        This implementation just passes the query as a regular message.
-        """
-        prompt = system_prompt or (
-            "You are a helpful assistant. Answer the following question. "
-            "If you have web search access, use it for current information."
-        )
-
-        messages = [{"role": "user", "content": query}]
-
-        return await self.generate(messages, prompt, max_tokens=300)
-
     async def close(self) -> None:
         """Close the client."""
         await self._client.close()