Merge remote-tracking branch 'upstream/main' into feat/new-chat-input-on-landing

2025-07-05 21:26:34 +02:00 · 2025-07-05 21:26:34 +02:00 · ab24a52ae4
parent f7fb080145 dba4e6e739
commit ab24a52ae4
29 changed files with 2008 additions and 2545 deletions
--- a/backend/agent/api.py
+++ b/backend/agent/api.py
@ -124,16 +124,6 @@ class ThreadAgentResponse(BaseModel):
    source: str  # "thread", "default", "none", "missing"
    message: str

-class AgentBuilderChatRequest(BaseModel):
-    message: str
-    conversation_history: List[Dict[str, str]] = []
-    partial_config: Optional[Dict[str, Any]] = None
-
-class AgentBuilderChatResponse(BaseModel):
-    response: str
-    suggested_config: Optional[Dict[str, Any]] = None
-    next_step: Optional[str] = None
-
 def initialize(
    _db: DBConnection,
    _instance_id: Optional[str] = None
@ -299,71 +289,6 @@ async def get_agent_run_with_access_check(client, agent_run_id: str, user_id: st
    await verify_thread_access(client, thread_id, user_id)
    return agent_run_data

-async def enhance_system_prompt(agent_name: str, description: str, user_system_prompt: str) -> str:
-    """
-    Enhance a basic system prompt using GPT-4o to create a more comprehensive and effective system prompt.
-    
-    Args:
-        agent_name: Name of the agent
-        description: Description of the agent
-        user_system_prompt: User's basic system prompt/instructions
-        
-    Returns:
-        Enhanced system prompt generated by GPT-4o
-    """
-    try:
-        system_message = """You are an expert at creating comprehensive system prompts for AI agents. Your task is to take basic agent information and transform it into a detailed, effective system prompt that will help the agent perform optimally.
-
-Guidelines for creating system prompts:
-1. Be specific about the agent's role, expertise, and capabilities
-2. Include clear behavioral guidelines and interaction style
-3. Specify the agent's knowledge domains and areas of expertise
-4. Include guidance on how to handle different types of requests
-5. Set appropriate boundaries and limitations
-6. Make the prompt engaging and easy to understand
-7. Ensure the prompt is comprehensive but not overly verbose
-8. Include relevant context about tools and capabilities the agent might have
-
-The enhanced prompt should be professional, clear, and actionable."""
-
-        user_message = f"""Please create an enhanced system prompt for an AI agent with the following details:
-
-Agent Name: {agent_name}
-Agent Description: {description}
-User's Instructions: {user_system_prompt}
-
-Transform this basic information into a comprehensive, effective system prompt that will help the agent perform at its best. The prompt should be detailed enough to guide the agent's behavior while remaining clear and actionable."""
-
-        messages = [
-            {"role": "system", "content": system_message},
-            {"role": "user", "content": user_message}
-        ]
-
-        logger.info(f"Enhancing system prompt for agent: {agent_name}")
-        response = await make_llm_api_call(
-            messages=messages,
-            model_name="openai/gpt-4o",
-            max_tokens=2000,
-            temperature=0.7
-        )
-
-        if response and response.get('choices') and response['choices'][0].get('message'):
-            enhanced_prompt = response['choices'][0]['message'].get('content', '').strip()
-            if enhanced_prompt:
-                logger.info(f"Successfully enhanced system prompt for agent: {agent_name}")
-                return enhanced_prompt
-            else:
-                logger.warning(f"GPT-4o returned empty enhanced prompt for agent: {agent_name}")
-                return user_system_prompt
-        else:
-            logger.warning(f"Failed to get valid response from GPT-4o for agent: {agent_name}")
-            return user_system_prompt
-
-    except Exception as e:
-        logger.error(f"Error enhancing system prompt for agent {agent_name}: {str(e)}")
-        # Return the original prompt if enhancement fails
-        return user_system_prompt
-
@router.post("/thread/{thread_id}/agent/start")
 async def start_agent(
    thread_id: str,
@ -1210,11 +1135,8 @@ async def initiate_agent_with_files(
        raise HTTPException(status_code=500, detail=f"Failed to initiate agent session: {str(e)}")


-
 # Custom agents

-
-
@router.get("/agents", response_model=AgentsResponse)
 async def get_agents(
    user_id: str = Depends(get_current_user_id_from_jwt),
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@ -89,7 +89,31 @@ You have the ability to execute operations using both Python and CLI tools:
  * Supported formats include JPG, PNG, GIF, WEBP, and other common image formats.
  * Maximum file size limit is 10 MB.

-### 2.2.7 DATA PROVIDERS
+### 2.2.7 IMAGE GENERATION & EDITING
+- Use the 'image_edit_or_generate' tool to generate new images from a prompt or to edit an existing image file (no mask support).
+  * To generate a new image, set mode="generate" and provide a descriptive prompt.
+  * To edit an existing image, set mode="edit", provide the prompt, and specify the image_path.
+  * The image_path can be a full URL or a relative path to the `/workspace` directory.
+  * Example (generate):
+      <function_calls>
+      <invoke name="image_edit_or_generate">
+      <parameter name="mode">generate</parameter>
+      <parameter name="prompt">A futuristic cityscape at sunset</parameter>
+      </invoke>
+      </function_calls>
+  * Example (edit):
+      <function_calls>
+      <invoke name="image_edit_or_generate">
+      <parameter name="mode">edit</parameter>
+      <parameter name="prompt">Add a red hat to the person in the image</parameter>
+      <parameter name="image_path">http://example.com/images/person.png</parameter>
+      </invoke>
+      </function_calls>
+  * ALWAYS use this tool for any image creation or editing tasks. Do not attempt to generate or edit images by any other means.
+  * You must use edit mode when the user asks you to edit an image or change an existing image in any way.
+  * Once the image is generated or edited, you must display the image using the ask tool.
+
+### 2.2.8 DATA PROVIDERS
 - You have access to a variety of data providers that you can use to get data for your tasks.
 - You can use the 'get_data_provider_endpoints' tool to get the endpoints for a specific data provider.
 - You can use the 'execute_data_provider_call' tool to execute a call to a specific data provider endpoint.
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@ -25,6 +25,7 @@ from utils.logger import logger
 from utils.auth_utils import get_account_id_from_thread
 from services.billing import check_billing_status
 from agent.tools.sb_vision_tool import SandboxVisionTool
+from agent.tools.sb_image_edit_tool import SandboxImageEditTool
 from services.langfuse import langfuse
 from langfuse.client import StatefulTraceClient
 from services.langfuse import langfuse
@ -57,7 +58,7 @@ async def run_agent(

    if not trace:
        trace = langfuse.trace(name="run_agent", session_id=thread_id, metadata={"project_id": project_id})
-    thread_manager = ThreadManager(trace=trace, is_agent_builder=is_agent_builder, target_agent_id=target_agent_id, agent_config=agent_config)
+    thread_manager = ThreadManager(trace=trace, is_agent_builder=is_agent_builder or False, target_agent_id=target_agent_id, agent_config=agent_config)

    client = await thread_manager.db.client

@ -107,6 +108,7 @@ async def run_agent(
        thread_manager.add_tool(MessageTool)
        thread_manager.add_tool(SandboxWebSearchTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxVisionTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
+        thread_manager.add_tool(SandboxImageEditTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
        if config.RAPID_API_KEY:
            thread_manager.add_tool(DataProvidersTool)
    else:
--- a/backend/agent/tools/mcp_tool_wrapper.py
+++ b/backend/agent/tools/mcp_tool_wrapper.py
@ -8,7 +8,7 @@ server tool calls through dynamically generated individual function methods.
 import json
 from typing import Any, Dict, List, Optional
 from agentpress.tool import Tool, ToolResult, openapi_schema, xml_schema, ToolSchema, SchemaType
-from mcp_local.client import MCPManager
+from mcp_service.client import MCPManager
 from utils.logger import logger
 import inspect
 from mcp import ClientSession
--- a/backend/agent/tools/sb_image_edit_tool.py
+++ b/backend/agent/tools/sb_image_edit_tool.py
@ -0,0 +1,172 @@
+from typing import Optional
+from agentpress.tool import ToolResult, openapi_schema, xml_schema
+from sandbox.tool_base import SandboxToolsBase
+from agentpress.thread_manager import ThreadManager
+import httpx
+from io import BytesIO
+import uuid
+from litellm import aimage_generation, aimage_edit
+import base64
+
+
+class SandboxImageEditTool(SandboxToolsBase):
+    """Tool for generating or editing images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""
+
+    def __init__(self, project_id: str, thread_id: str, thread_manager: ThreadManager):
+        super().__init__(project_id, thread_manager)
+        self.thread_id = thread_id
+        self.thread_manager = thread_manager
+
+    @openapi_schema(
+        {
+            "type": "function",
+            "function": {
+                "name": "image_edit_or_generate",
+                "description": "Generate a new image from a prompt, or edit an existing image (no mask support) using OpenAI GPT Image 1 via OpenAI SDK. Stores the result in the thread context.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "mode": {
+                            "type": "string",
+                            "enum": ["generate", "edit"],
+                            "description": "'generate' to create a new image from a prompt, 'edit' to edit an existing image.",
+                        },
+                        "prompt": {
+                            "type": "string",
+                            "description": "Text prompt describing the desired image or edit.",
+                        },
+                        "image_path": {
+                            "type": "string",
+                            "description": "(edit mode only) Path to the image file to edit, relative to /workspace. Required for 'edit'.",
+                        },
+                    },
+                    "required": ["mode", "prompt"],
+                },
+            },
+        }
+    )
+    @xml_schema(
+        tag_name="image-edit-or-generate",
+        mappings=[
+            {"param_name": "mode", "node_type": "attribute", "path": "."},
+            {"param_name": "prompt", "node_type": "attribute", "path": "."},
+            {"param_name": "image_path", "node_type": "attribute", "path": "."},
+        ],
+        example="""
+        <function_calls>
+        <invoke name="image_edit_or_generate">
+        <parameter name="mode">generate</parameter>
+        <parameter name="prompt">A futuristic cityscape at sunset</parameter>
+        </invoke>
+        </function_calls>
+        """,
+    )
+    async def image_edit_or_generate(
+        self,
+        mode: str,
+        prompt: str,
+        image_path: Optional[str] = None,
+    ) -> ToolResult:
+        """Generate or edit images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""
+        try:
+            await self._ensure_sandbox()
+
+            if mode == "generate":
+                response = await aimage_generation(
+                    model="gpt-image-1",
+                    prompt=prompt,
+                    n=1,
+                    size="1024x1024",
+                )
+            elif mode == "edit":
+                if not image_path:
+                    return self.fail_response("'image_path' is required for edit mode.")
+
+                image_bytes = await self._get_image_bytes(image_path)
+                if isinstance(image_bytes, ToolResult):  # Error occurred
+                    return image_bytes
+
+                # Create BytesIO object with proper filename to set MIME type
+                image_io = BytesIO(image_bytes)
+                image_io.name = (
+                    "image.png"  # Set filename to ensure proper MIME type detection
+                )
+
+                response = await aimage_edit(
+                    image=[image_io],  # Type in the LiteLLM SDK is wrong
+                    prompt=prompt,
+                    model="gpt-image-1",
+                    n=1,
+                    size="1024x1024",
+                )
+            else:
+                return self.fail_response("Invalid mode. Use 'generate' or 'edit'.")
+
+            # Download and save the generated image to sandbox
+            image_filename = await self._process_image_response(response)
+            if isinstance(image_filename, ToolResult):  # Error occurred
+                return image_filename
+
+            return self.success_response(
+                f"Successfully generated image using mode '{mode}'. Image saved as: {image_filename}. You can use the ask tool to display the image."
+            )
+
+        except Exception as e:
+            return self.fail_response(
+                f"An error occurred during image generation/editing: {str(e)}"
+            )
+
+    async def _get_image_bytes(self, image_path: str) -> bytes | ToolResult:
+        """Get image bytes from URL or local file path."""
+        if image_path.startswith(("http://", "https://")):
+            return await self._download_image_from_url(image_path)
+        else:
+            return await self._read_image_from_sandbox(image_path)
+
+    async def _download_image_from_url(self, url: str) -> bytes | ToolResult:
+        """Download image from URL."""
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(url)
+                response.raise_for_status()
+                return response.content
+        except Exception:
+            return self.fail_response(f"Could not download image from URL: {url}")
+
+    async def _read_image_from_sandbox(self, image_path: str) -> bytes | ToolResult:
+        """Read image from sandbox filesystem."""
+        try:
+            cleaned_path = self.clean_path(image_path)
+            full_path = f"{self.workspace_path}/{cleaned_path}"
+
+            # Check if file exists and is not a directory
+            file_info = await self.sandbox.fs.get_file_info(full_path)
+            if file_info.is_dir:
+                return self.fail_response(
+                    f"Path '{cleaned_path}' is a directory, not an image file."
+                )
+
+            return await self.sandbox.fs.download_file(full_path)
+
+        except Exception as e:
+            return self.fail_response(
+                f"Could not read image file from sandbox: {image_path} - {str(e)}"
+            )
+
+    async def _process_image_response(self, response) -> str | ToolResult:
+        """Download generated image and save to sandbox with random name."""
+        try:
+            original_b64_str = response.data[0].b64_json
+            # Decode base64 image data
+            image_data = base64.b64decode(original_b64_str)
+
+            # Generate random filename
+            random_filename = f"generated_image_{uuid.uuid4().hex[:8]}.png"
+            sandbox_path = f"{self.workspace_path}/{random_filename}"
+
+            # Save image to sandbox
+            await self.sandbox.fs.upload_file(image_data, sandbox_path)
+            return random_filename
+
+        except Exception as e:
+            return self.fail_response(f"Failed to download and save image: {str(e)}")
--- a/backend/agentpress/context_manager.py
+++ b/backend/agentpress/context_manager.py
@ -6,9 +6,10 @@ reaching the context window limitations of LLM models.
 """

 import json
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Union

-from litellm import token_counter, completion_cost
+from litellm.utils import token_counter
+from litellm.cost_calculator import completion_cost
 from services.supabase import DBConnection
 from services.llm import make_llm_api_call
 from utils.logger import logger
@ -29,270 +30,291 @@ class ContextManager:
        """
        self.db = DBConnection()
        self.token_threshold = token_threshold
+
+    def is_tool_result_message(self, msg: Dict[str, Any]) -> bool:
+        """Check if a message is a tool result message."""
+        if not ("content" in msg and msg['content']):
+            return False
+        content = msg['content']
+        if isinstance(content, str) and "ToolResult" in content: 
+            return True
+        if isinstance(content, dict) and "tool_execution" in content: 
+            return True
+        if isinstance(content, dict) and "interactive_elements" in content: 
+            return True
+        if isinstance(content, str):
+            try:
+                parsed_content = json.loads(content)
+                if isinstance(parsed_content, dict) and "tool_execution" in parsed_content: 
+                    return True
+                if isinstance(parsed_content, dict) and "interactive_elements" in content: 
+                    return True
+            except (json.JSONDecodeError, TypeError):
+                pass
+        return False
    
-    async def get_thread_token_count(self, thread_id: str) -> int:
-        """Get the current token count for a thread using LiteLLM.
-        
-        Args:
-            thread_id: ID of the thread to analyze
-            
-        Returns:
-            The total token count for relevant messages in the thread
-        """
-        logger.debug(f"Getting token count for thread {thread_id}")
-        
-        try:
-            # Get messages for the thread
-            messages = await self.get_messages_for_summarization(thread_id)
-            
-            if not messages:
-                logger.debug(f"No messages found for thread {thread_id}")
-                return 0
-            
-            # Use litellm's token_counter for accurate model-specific counting
-            # This is much more accurate than the SQL-based estimation
-            token_count = token_counter(model="gpt-4", messages=messages)
-            
-            logger.info(f"Thread {thread_id} has {token_count} tokens (calculated with litellm)")
-            return token_count
-                
-        except Exception as e:
-            logger.error(f"Error getting token count: {str(e)}")
-            return 0
-    
-    async def get_messages_for_summarization(self, thread_id: str) -> List[Dict[str, Any]]:
-        """Get all LLM messages from the thread that need to be summarized.
-        
-        This gets messages after the most recent summary or all messages if
-        no summary exists. Unlike get_llm_messages, this includes ALL messages 
-        since the last summary, even if we're generating a new summary.
-        
-        Args:
-            thread_id: ID of the thread to get messages from
-            
-        Returns:
-            List of message objects to summarize
-        """
-        logger.debug(f"Getting messages for summarization for thread {thread_id}")
-        client = await self.db.client
-        
-        try:
-            # Find the most recent summary message
-            summary_result = await client.table('messages').select('created_at') \
-                .eq('thread_id', thread_id) \
-                .eq('type', 'summary') \
-                .eq('is_llm_message', True) \
-                .order('created_at', desc=True) \
-                .limit(1) \
-                .execute()
-            
-            # Get messages after the most recent summary or all messages if no summary
-            if summary_result.data and len(summary_result.data) > 0:
-                last_summary_time = summary_result.data[0]['created_at']
-                logger.debug(f"Found last summary at {last_summary_time}")
-                
-                # Get all messages after the summary, but NOT including the summary itself
-                messages_result = await client.table('messages').select('*') \
-                    .eq('thread_id', thread_id) \
-                    .eq('is_llm_message', True) \
-                    .gt('created_at', last_summary_time) \
-                    .order('created_at') \
-                    .execute()
+    def compress_message(self, msg_content: Union[str, dict], message_id: Optional[str] = None, max_length: int = 3000) -> Union[str, dict]:
+        """Compress the message content."""
+        if isinstance(msg_content, str):
+            if len(msg_content) > max_length:
+                return msg_content[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
            else:
-                logger.debug("No previous summary found, getting all messages")
-                # Get all messages
-                messages_result = await client.table('messages').select('*') \
-                    .eq('thread_id', thread_id) \
-                    .eq('is_llm_message', True) \
-                    .order('created_at') \
-                    .execute()
-            
-            # Parse the message content if needed
-            messages = []
-            for msg in messages_result.data:
-                # Skip existing summary messages - we don't want to summarize summaries
-                if msg.get('type') == 'summary':
-                    logger.debug(f"Skipping summary message from {msg.get('created_at')}")
-                    continue
-                    
-                # Parse content if it's a string
-                content = msg['content']
-                if isinstance(content, str):
-                    try:
-                        content = json.loads(content)
-                    except json.JSONDecodeError:
-                        pass  # Keep as string if not valid JSON
+                return msg_content
+        elif isinstance(msg_content, dict):
+            if len(json.dumps(msg_content)) > max_length:
+                return json.dumps(msg_content)[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
+            else:
+                return msg_content
+        
+    def safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
+        """Truncate the message content safely by removing the middle portion."""
+        max_length = min(max_length, 100000)
+        if isinstance(msg_content, str):
+            if len(msg_content) > max_length:
+                # Calculate how much to keep from start and end
+                keep_length = max_length - 150  # Reserve space for truncation message
+                start_length = keep_length // 2
+                end_length = keep_length - start_length
                
-                # Ensure we have the proper format for the LLM
-                if 'role' not in content and 'type' in msg:
-                    # Convert message type to role if needed
-                    role = msg['type']
-                    if role == 'assistant' or role == 'user' or role == 'system' or role == 'tool':
-                        content = {'role': role, 'content': content}
+                start_part = msg_content[:start_length]
+                end_part = msg_content[-end_length:] if end_length > 0 else ""
                
-                messages.append(content)
-            
-            logger.info(f"Got {len(messages)} messages to summarize for thread {thread_id}")
-            return messages
-            
-        except Exception as e:
-            logger.error(f"Error getting messages for summarization: {str(e)}", exc_info=True)
-            return []
-    
-    async def create_summary(
-        self, 
-        thread_id: str, 
-        messages: List[Dict[str, Any]], 
-        model: str = "gpt-4o-mini"
-    ) -> Optional[Dict[str, Any]]:
-        """Generate a summary of conversation messages.
+                return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
+            else:
+                return msg_content
+        elif isinstance(msg_content, dict):
+            json_str = json.dumps(msg_content)
+            if len(json_str) > max_length:
+                # Calculate how much to keep from start and end
+                keep_length = max_length - 150  # Reserve space for truncation message
+                start_length = keep_length // 2
+                end_length = keep_length - start_length
+                
+                start_part = json_str[:start_length]
+                end_part = json_str[-end_length:] if end_length > 0 else ""
+                
+                return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
+            else:
+                return msg_content
+  
+    def compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
+        """Compress the tool result messages except the most recent one."""
+        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
+        max_tokens_value = max_tokens or (100 * 1000)
+
+        if uncompressed_total_token_count > max_tokens_value:
+            _i = 0  # Count the number of ToolResult messages
+            for msg in reversed(messages):  # Start from the end and work backwards
+                if self.is_tool_result_message(msg):  # Only compress ToolResult messages
+                    _i += 1  # Count the number of ToolResult messages
+                    msg_token_count = token_counter(messages=[msg])  # Count the number of tokens in the message
+                    if msg_token_count > token_threshold:  # If the message is too long
+                        if _i > 1:  # If this is not the most recent ToolResult message
+                            message_id = msg.get('message_id')  # Get the message_id
+                            if message_id:
+                                msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
+                            else:
+                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
+                        else:
+                            msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
+        return messages
+
+    def compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
+        """Compress the user messages except the most recent one."""
+        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
+        max_tokens_value = max_tokens or (100 * 1000)
+
+        if uncompressed_total_token_count > max_tokens_value:
+            _i = 0  # Count the number of User messages
+            for msg in reversed(messages):  # Start from the end and work backwards
+                if msg.get('role') == 'user':  # Only compress User messages
+                    _i += 1  # Count the number of User messages
+                    msg_token_count = token_counter(messages=[msg])  # Count the number of tokens in the message
+                    if msg_token_count > token_threshold:  # If the message is too long
+                        if _i > 1:  # If this is not the most recent User message
+                            message_id = msg.get('message_id')  # Get the message_id
+                            if message_id:
+                                msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
+                            else:
+                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
+                        else:
+                            msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
+        return messages
+
+    def compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
+        """Compress the assistant messages except the most recent one."""
+        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
+        max_tokens_value = max_tokens or (100 * 1000)
+        
+        if uncompressed_total_token_count > max_tokens_value:
+            _i = 0  # Count the number of Assistant messages
+            for msg in reversed(messages):  # Start from the end and work backwards
+                if msg.get('role') == 'assistant':  # Only compress Assistant messages
+                    _i += 1  # Count the number of Assistant messages
+                    msg_token_count = token_counter(messages=[msg])  # Count the number of tokens in the message
+                    if msg_token_count > token_threshold:  # If the message is too long
+                        if _i > 1:  # If this is not the most recent Assistant message
+                            message_id = msg.get('message_id')  # Get the message_id
+                            if message_id:
+                                msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
+                            else:
+                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
+                        else:
+                            msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
+                            
+        return messages
+
+    def remove_meta_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Remove meta messages from the messages."""
+        result: List[Dict[str, Any]] = []
+        for msg in messages:
+            msg_content = msg.get('content')
+            # Try to parse msg_content as JSON if it's a string
+            if isinstance(msg_content, str):
+                try: 
+                    msg_content = json.loads(msg_content)
+                except json.JSONDecodeError: 
+                    pass
+            if isinstance(msg_content, dict):
+                # Create a copy to avoid modifying the original
+                msg_content_copy = msg_content.copy()
+                if "tool_execution" in msg_content_copy:
+                    tool_execution = msg_content_copy["tool_execution"].copy()
+                    if "arguments" in tool_execution:
+                        del tool_execution["arguments"]
+                    msg_content_copy["tool_execution"] = tool_execution
+                # Create a new message dict with the modified content
+                new_msg = msg.copy()
+                new_msg["content"] = json.dumps(msg_content_copy)
+                result.append(new_msg)
+            else:
+                result.append(msg)
+        return result
+
+    def compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: int = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]:
+        """Compress the messages.
        
        Args:
-            thread_id: ID of the thread to summarize
-            messages: Messages to summarize
-            model: LLM model to use for summarization
-            
-        Returns:
-            Summary message object or None if summarization failed
+            messages: List of messages to compress
+            llm_model: Model name for token counting
+            max_tokens: Maximum allowed tokens
+            token_threshold: Token threshold for individual message compression (must be a power of 2)
+            max_iterations: Maximum number of compression iterations
+        """
+        # Set model-specific token limits
+        if 'sonnet' in llm_model.lower():
+            max_tokens = 200 * 1000 - 64000 - 28000
+        elif 'gpt' in llm_model.lower():
+            max_tokens = 128 * 1000 - 28000
+        elif 'gemini' in llm_model.lower():
+            max_tokens = 1000 * 1000 - 300000
+        elif 'deepseek' in llm_model.lower():
+            max_tokens = 128 * 1000 - 28000
+        else:
+            max_tokens = 41 * 1000 - 10000
+
+        result = messages
+        result = self.remove_meta_messages(result)
+
+        uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
+
+        result = self.compress_tool_result_messages(result, llm_model, max_tokens, token_threshold)
+        result = self.compress_user_messages(result, llm_model, max_tokens, token_threshold)
+        result = self.compress_assistant_messages(result, llm_model, max_tokens, token_threshold)
+
+        compressed_token_count = token_counter(model=llm_model, messages=result)
+
+        logger.info(f"compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}")  # Log the token compression for debugging later
+
+        if max_iterations <= 0:
+            logger.warning(f"compress_messages: Max iterations reached, omitting messages")
+            result = self.compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
+            return result
+
+        if compressed_token_count > max_tokens:
+            logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
+            result = self.compress_messages(messages, llm_model, max_tokens, token_threshold // 2, max_iterations - 1)
+
+        return self.middle_out_messages(result)
+    
+    def compress_messages_by_omitting_messages(
+            self, 
+            messages: List[Dict[str, Any]], 
+            llm_model: str, 
+            max_tokens: Optional[int] = 41000,
+            removal_batch_size: int = 10,
+            min_messages_to_keep: int = 10
+        ) -> List[Dict[str, Any]]:
+        """Compress the messages by omitting messages from the middle.
+        
+        Args:
+            messages: List of messages to compress
+            llm_model: Model name for token counting
+            max_tokens: Maximum allowed tokens
+            removal_batch_size: Number of messages to remove per iteration
+            min_messages_to_keep: Minimum number of messages to preserve
        """
        if not messages:
-            logger.warning("No messages to summarize")
-            return None
-        
-        logger.info(f"Creating summary for thread {thread_id} with {len(messages)} messages")
-        
-        # Create system message with summarization instructions
-        system_message = {
-            "role": "system",
-            "content": f"""You are a specialized summarization assistant. Your task is to create a concise but comprehensive summary of the conversation history.
-
-The summary should:
-1. Preserve all key information including decisions, conclusions, and important context
-2. Include any tools that were used and their results
-3. Maintain chronological order of events
-4. Be presented as a narrated list of key points with section headers
-5. Include only factual information from the conversation (no new information)
-6. Be concise but detailed enough that the conversation can continue with this summary as context
-
-VERY IMPORTANT: This summary will replace older parts of the conversation in the LLM's context window, so ensure it contains ALL key information and LATEST STATE OF THE CONVERSATION - SO WE WILL KNOW HOW TO PICK UP WHERE WE LEFT OFF.
-
-
-THE CONVERSATION HISTORY TO SUMMARIZE IS AS FOLLOWS:
-===============================================================
-==================== CONVERSATION HISTORY ====================
-{messages}
-==================== END OF CONVERSATION HISTORY ====================
-===============================================================
-"""
-        }
-        
-        try:
-            # Call LLM to generate summary
-            response = await make_llm_api_call(
-                model_name=model,
-                messages=[system_message, {"role": "user", "content": "PLEASE PROVIDE THE SUMMARY NOW."}],
-                temperature=0,
-                max_tokens=SUMMARY_TARGET_TOKENS,
-                stream=False
-            )
+            return messages
            
-            if response and hasattr(response, 'choices') and response.choices:
-                summary_content = response.choices[0].message.content
-                
-                # Track token usage
-                try:
-                    token_count = token_counter(model=model, messages=[{"role": "user", "content": summary_content}])
-                    cost = completion_cost(model=model, prompt="", completion=summary_content)
-                    logger.info(f"Summary generated with {token_count} tokens at cost ${cost:.6f}")
-                except Exception as e:
-                    logger.error(f"Error calculating token usage: {str(e)}")
-                
-                # Format the summary message with clear beginning and end markers
-                formatted_summary = f"""
-======== CONVERSATION HISTORY SUMMARY ========
+        result = messages
+        result = self.remove_meta_messages(result)

-{summary_content}
+        # Early exit if no compression needed
+        initial_token_count = token_counter(model=llm_model, messages=result)
+        max_allowed_tokens = max_tokens or (100 * 1000)
+        
+        if initial_token_count <= max_allowed_tokens:
+            return result

-======== END OF SUMMARY ========
+        # Separate system message (assumed to be first) from conversation messages
+        system_message = messages[0] if messages and messages[0].get('role') == 'system' else None
+        conversation_messages = result[1:] if system_message else result
+        
+        safety_limit = 500
+        current_token_count = initial_token_count
+        
+        while current_token_count > max_allowed_tokens and safety_limit > 0:
+            safety_limit -= 1
+            
+            if len(conversation_messages) <= min_messages_to_keep:
+                logger.warning(f"Cannot compress further: only {len(conversation_messages)} messages remain (min: {min_messages_to_keep})")
+                break

-The above is a summary of the conversation history. The conversation continues below.
-"""
-                
-                # Format the summary message
-                summary_message = {
-                    "role": "user",
-                    "content": formatted_summary
-                }
-                
-                return summary_message
+            # Calculate removal strategy based on current message count
+            if len(conversation_messages) > (removal_batch_size * 2):
+                # Remove from middle, keeping recent and early context
+                middle_start = len(conversation_messages) // 2 - (removal_batch_size // 2)
+                middle_end = middle_start + removal_batch_size
+                conversation_messages = conversation_messages[:middle_start] + conversation_messages[middle_end:]
            else:
-                logger.error("Failed to generate summary: Invalid response")
-                return None
-                
-        except Exception as e:
-            logger.error(f"Error creating summary: {str(e)}", exc_info=True)
-            return None
+                # Remove from earlier messages, preserving recent context
+                messages_to_remove = min(removal_batch_size, len(conversation_messages) // 2)
+                if messages_to_remove > 0:
+                    conversation_messages = conversation_messages[messages_to_remove:]
+                else:
+                    # Can't remove any more messages
+                    break
+
+            # Recalculate token count
+            messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
+            current_token_count = token_counter(model=llm_model, messages=messages_to_count)
+
+        # Prepare final result
+        final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
+        final_token_count = token_counter(model=llm_model, messages=final_messages)
        
-    async def check_and_summarize_if_needed(
-        self, 
-        thread_id: str, 
-        add_message_callback, 
-        model: str = "gpt-4o-mini",
-        force: bool = False
-    ) -> bool:
-        """Check if thread needs summarization and summarize if so.
+        logger.info(f"compress_messages_by_omitting_messages: {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
+            
+        return final_messages
+    
+    def middle_out_messages(self, messages: List[Dict[str, Any]], max_messages: int = 320) -> List[Dict[str, Any]]:
+        """Remove messages from the middle of the list, keeping max_messages total."""
+        if len(messages) <= max_messages:
+            return messages
        
-        Args:
-            thread_id: ID of the thread to check
-            add_message_callback: Callback to add the summary message to the thread
-            model: LLM model to use for summarization
-            force: Whether to force summarization regardless of token count
-            
-        Returns:
-            True if summarization was performed, False otherwise
-        """
-        try:
-            # Get token count using LiteLLM (accurate model-specific counting)
-            token_count = await self.get_thread_token_count(thread_id)
-            
-            # If token count is below threshold and not forcing, no summarization needed
-            if token_count < self.token_threshold and not force:
-                logger.debug(f"Thread {thread_id} has {token_count} tokens, below threshold {self.token_threshold}")
-                return False
-            
-            # Log reason for summarization
-            if force:
-                logger.info(f"Forced summarization of thread {thread_id} with {token_count} tokens")
-            else:
-                logger.info(f"Thread {thread_id} exceeds token threshold ({token_count} >= {self.token_threshold}), summarizing...")
-            
-            # Get messages to summarize
-            messages = await self.get_messages_for_summarization(thread_id)
-            
-            # If there are too few messages, don't summarize
-            if len(messages) < 3:
-                logger.info(f"Thread {thread_id} has too few messages ({len(messages)}) to summarize")
-                return False
-            
-            # Create summary
-            summary = await self.create_summary(thread_id, messages, model)
-            
-            if summary:
-                # Add summary message to thread
-                await add_message_callback(
-                    thread_id=thread_id,
-                    type="summary",
-                    content=summary,
-                    is_llm_message=True,
-                    metadata={"token_count": token_count}
-                )
-                
-                logger.info(f"Successfully added summary to thread {thread_id}")
-                return True
-            else:
-                logger.error(f"Failed to create summary for thread {thread_id}")
-                return False
-                
-        except Exception as e:
-            logger.error(f"Error in check_and_summarize_if_needed: {str(e)}", exc_info=True)
-            return False 
+        # Keep half from the beginning and half from the end
+        keep_start = max_messages // 2
+        keep_end = max_messages - keep_start
+        
+        return messages[:keep_start] + messages[-keep_end:] 
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@ -1602,51 +1602,6 @@ class ResponseProcessor:
                )
                return message_obj # Return the full message object
            
-            # Check if this is an MCP tool (function_name starts with "call_mcp_tool")
-            function_name = tool_call.get("function_name", "")
-            
-            # Check if this is an MCP tool - either the old call_mcp_tool or a dynamically registered MCP tool
-            is_mcp_tool = False
-            if function_name == "call_mcp_tool":
-                is_mcp_tool = True
-            else:
-                # Check if the result indicates it's an MCP tool by looking for MCP metadata
-                if hasattr(result, 'output') and isinstance(result.output, str):
-                    # Check for MCP metadata pattern in the output
-                    if "MCP Tool Result from" in result.output and "Tool Metadata:" in result.output:
-                        is_mcp_tool = True
-                    # Also check for MCP metadata in JSON format
-                    elif "mcp_metadata" in result.output:
-                        is_mcp_tool = True
-            
-            if is_mcp_tool:
-                # Special handling for MCP tools - make content prominent and LLM-friendly
-                result_role = "user" if strategy == "user_message" else "assistant"
-                
-                # Extract the actual content from the ToolResult
-                if hasattr(result, 'output'):
-                    mcp_content = str(result.output)
-                else:
-                    mcp_content = str(result)
-                
-                # Create a simple, LLM-friendly message format that puts content first
-                simple_message = {
-                    "role": result_role,
-                    "content": mcp_content  # Direct content, no complex nesting
-                }
-                
-                logger.info(f"Adding MCP tool result with simplified format for LLM visibility")
-                self.trace.event(name="adding_mcp_tool_result_simplified", level="DEFAULT", status_message="Adding MCP tool result with simplified format for LLM visibility")
-                
-                message_obj = await self.add_message(
-                    thread_id=thread_id, 
-                    type="tool",
-                    content=simple_message,
-                    is_llm_message=True,
-                    metadata=metadata
-                )
-                return message_obj
-            
            # For XML and other non-native tools, use the new structured format
            # Determine message role based on strategy
            result_role = "user" if strategy == "user_message" else "assistant"
@ -1781,28 +1736,6 @@ class ResponseProcessor:
            
        return structured_result_v1

-    def _format_xml_tool_result(self, tool_call: Dict[str, Any], result: ToolResult) -> str:
-        """Format a tool result wrapped in a <tool_result> tag.
-        
-        DEPRECATED: This method is kept for backwards compatibility.
-        New implementations should use _create_structured_tool_result instead.
-
-        Args:
-            tool_call: The tool call that was executed
-            result: The result of the tool execution
-
-        Returns:
-            String containing the formatted result wrapped in <tool_result> tag
-        """
-        # Always use xml_tag_name if it exists
-        if "xml_tag_name" in tool_call:
-            xml_tag_name = tool_call["xml_tag_name"]
-            return f"<tool_result> <{xml_tag_name}> {str(result)} </{xml_tag_name}> </tool_result>"
-        
-        # Non-XML tool, just return the function result
-        function_name = tool_call["function_name"]
-        return f"Result for {function_name}: {str(result)}"
-
    def _create_tool_context(self, tool_call: Dict[str, Any], tool_index: int, assistant_message_id: Optional[str] = None, parsing_details: Optional[Dict[str, Any]] = None) -> ToolExecutionContext:
        """Create a tool execution context with display name and parsing details populated."""
        context = ToolExecutionContext(
--- a/backend/agentpress/thread_manager.py
+++ b/backend/agentpress/thread_manager.py
@ -25,7 +25,7 @@ from utils.logger import logger
 from langfuse.client import StatefulGenerationClient, StatefulTraceClient
 from services.langfuse import langfuse
 import datetime
-from litellm import token_counter
+from litellm.utils import token_counter

 # Type alias for tool choice
 ToolChoice = Literal["auto", "required", "none"]
@ -65,279 +65,6 @@ class ThreadManager:
        )
        self.context_manager = ContextManager()

-    def _is_tool_result_message(self, msg: Dict[str, Any]) -> bool:
-        if not ("content" in msg and msg['content']):
-            return False
-        content = msg['content']
-        if isinstance(content, str) and "ToolResult" in content: return True
-        if isinstance(content, dict) and "tool_execution" in content: return True
-        if isinstance(content, dict) and "interactive_elements" in content: return True
-        if isinstance(content, str):
-            try:
-                parsed_content = json.loads(content)
-                if isinstance(parsed_content, dict) and "tool_execution" in parsed_content: return True
-                if isinstance(parsed_content, dict) and "interactive_elements" in content: return True
-            except (json.JSONDecodeError, TypeError):
-                pass
-        return False
-    
-    def _compress_message(self, msg_content: Union[str, dict], message_id: Optional[str] = None, max_length: int = 3000) -> Union[str, dict]:
-        """Compress the message content."""
-        # print("max_length", max_length)
-        if isinstance(msg_content, str):
-            if len(msg_content) > max_length:
-                return msg_content[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
-            else:
-                return msg_content
-        elif isinstance(msg_content, dict):
-            if len(json.dumps(msg_content)) > max_length:
-                return json.dumps(msg_content)[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
-            else:
-                return msg_content
-        
-    def _safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
-        """Truncate the message content safely by removing the middle portion."""
-        max_length = min(max_length, 100000)
-        if isinstance(msg_content, str):
-            if len(msg_content) > max_length:
-                # Calculate how much to keep from start and end
-                keep_length = max_length - 150  # Reserve space for truncation message
-                start_length = keep_length // 2
-                end_length = keep_length - start_length
-                
-                start_part = msg_content[:start_length]
-                end_part = msg_content[-end_length:] if end_length > 0 else ""
-                
-                return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
-            else:
-                return msg_content
-        elif isinstance(msg_content, dict):
-            json_str = json.dumps(msg_content)
-            if len(json_str) > max_length:
-                # Calculate how much to keep from start and end
-                keep_length = max_length - 150  # Reserve space for truncation message
-                start_length = keep_length // 2
-                end_length = keep_length - start_length
-                
-                start_part = json_str[:start_length]
-                end_part = json_str[-end_length:] if end_length > 0 else ""
-                
-                return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
-            else:
-                return msg_content
-  
-    def _compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
-        """Compress the tool result messages except the most recent one."""
-        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
-
-        if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
-            _i = 0 # Count the number of ToolResult messages
-            for msg in reversed(messages): # Start from the end and work backwards
-                if self._is_tool_result_message(msg): # Only compress ToolResult messages
-                    _i += 1 # Count the number of ToolResult messages
-                    msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
-                    if msg_token_count > token_threshold: # If the message is too long
-                        if _i > 1: # If this is not the most recent ToolResult message
-                            message_id = msg.get('message_id') # Get the message_id
-                            if message_id:
-                                msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
-                            else:
-                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
-                        else:
-                            msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
-        return messages
-
-    def _compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
-        """Compress the user messages except the most recent one."""
-        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
-
-        if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
-            _i = 0 # Count the number of User messages
-            for msg in reversed(messages): # Start from the end and work backwards
-                if msg.get('role') == 'user': # Only compress User messages
-                    _i += 1 # Count the number of User messages
-                    msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
-                    if msg_token_count > token_threshold: # If the message is too long
-                        if _i > 1: # If this is not the most recent User message
-                            message_id = msg.get('message_id') # Get the message_id
-                            if message_id:
-                                msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
-                            else:
-                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
-                        else:
-                            msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
-        return messages
-
-    def _compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
-        """Compress the assistant messages except the most recent one."""
-        uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
-        if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
-            _i = 0 # Count the number of Assistant messages
-            for msg in reversed(messages): # Start from the end and work backwards
-                if msg.get('role') == 'assistant': # Only compress Assistant messages
-                    _i += 1 # Count the number of Assistant messages
-                    msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
-                    if msg_token_count > token_threshold: # If the message is too long
-                        if _i > 1: # If this is not the most recent Assistant message
-                            message_id = msg.get('message_id') # Get the message_id
-                            if message_id:
-                                msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
-                            else:
-                                logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
-                        else:
-                            msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
-                            
-        return messages
-
-
-    def _remove_meta_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Remove meta messages from the messages."""
-        result: List[Dict[str, Any]] = []
-        for msg in messages:
-            msg_content = msg.get('content')
-            # Try to parse msg_content as JSON if it's a string
-            if isinstance(msg_content, str):
-                try: msg_content = json.loads(msg_content)
-                except json.JSONDecodeError: pass
-            if isinstance(msg_content, dict):
-                # Create a copy to avoid modifying the original
-                msg_content_copy = msg_content.copy()
-                if "tool_execution" in msg_content_copy:
-                    tool_execution = msg_content_copy["tool_execution"].copy()
-                    if "arguments" in tool_execution:
-                        del tool_execution["arguments"]
-                    msg_content_copy["tool_execution"] = tool_execution
-                # Create a new message dict with the modified content
-                new_msg = msg.copy()
-                new_msg["content"] = json.dumps(msg_content_copy)
-                result.append(new_msg)
-            else:
-                result.append(msg)
-        return result
-
-    def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: Optional[int] = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]:
-        """Compress the messages.
-            token_threshold: must be a power of 2
-        """
-
-        if 'sonnet' in llm_model.lower():
-            max_tokens = 200 * 1000 - 64000 - 28000
-        elif 'gpt' in llm_model.lower():
-            max_tokens = 128 * 1000 - 28000
-        elif 'gemini' in llm_model.lower():
-            max_tokens = 1000 * 1000 - 300000
-        elif 'deepseek' in llm_model.lower():
-            max_tokens = 128 * 1000 - 28000
-        else:
-            max_tokens = 41 * 1000 - 10000
-
-        result = messages
-        result = self._remove_meta_messages(result)
-
-        uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
-
-        result = self._compress_tool_result_messages(result, llm_model, max_tokens, token_threshold)
-        result = self._compress_user_messages(result, llm_model, max_tokens, token_threshold)
-        result = self._compress_assistant_messages(result, llm_model, max_tokens, token_threshold)
-
-        compressed_token_count = token_counter(model=llm_model, messages=result)
-
-        logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
-
-        if max_iterations <= 0:
-            logger.warning(f"_compress_messages: Max iterations reached, omitting messages")
-            result = self._compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
-            return result
-
-        if (compressed_token_count > max_tokens):
-            logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
-            result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1)
-
-        return self._middle_out_messages(result)
-    
-    def _compress_messages_by_omitting_messages(
-            self, 
-            messages: List[Dict[str, Any]], 
-            llm_model: str, 
-            max_tokens: Optional[int] = 41000,
-            removal_batch_size: int = 10,
-            min_messages_to_keep: int = 10
-        ) -> List[Dict[str, Any]]:
-        """Compress the messages by omitting messages from the middle.
-        
-        Args:
-            messages: List of messages to compress
-            llm_model: Model name for token counting
-            max_tokens: Maximum allowed tokens
-            removal_batch_size: Number of messages to remove per iteration
-            min_messages_to_keep: Minimum number of messages to preserve
-        """
-        if not messages:
-            return messages
-            
-        result = messages
-        result = self._remove_meta_messages(result)
-
-        # Early exit if no compression needed
-        initial_token_count = token_counter(model=llm_model, messages=result)
-        max_allowed_tokens = max_tokens or (100 * 1000)
-        
-        if initial_token_count <= max_allowed_tokens:
-            return result
-
-        # Separate system message (assumed to be first) from conversation messages
-        system_message = messages[0] if messages and messages[0].get('role') == 'system' else None
-        conversation_messages = result[1:] if system_message else result
-        
-        safety_limit = 500
-        current_token_count = initial_token_count
-        
-        while current_token_count > max_allowed_tokens and safety_limit > 0:
-            safety_limit -= 1
-            
-            if len(conversation_messages) <= min_messages_to_keep:
-                logger.warning(f"Cannot compress further: only {len(conversation_messages)} messages remain (min: {min_messages_to_keep})")
-                break
-
-            # Calculate removal strategy based on current message count
-            if len(conversation_messages) > (removal_batch_size * 2):
-                # Remove from middle, keeping recent and early context
-                middle_start = len(conversation_messages) // 2 - (removal_batch_size // 2)
-                middle_end = middle_start + removal_batch_size
-                conversation_messages = conversation_messages[:middle_start] + conversation_messages[middle_end:]
-            else:
-                # Remove from earlier messages, preserving recent context
-                messages_to_remove = min(removal_batch_size, len(conversation_messages) // 2)
-                if messages_to_remove > 0:
-                    conversation_messages = conversation_messages[messages_to_remove:]
-                else:
-                    # Can't remove any more messages
-                    break
-
-            # Recalculate token count
-            messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
-            current_token_count = token_counter(model=llm_model, messages=messages_to_count)
-
-        # Prepare final result
-        final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
-        final_token_count = token_counter(model=llm_model, messages=final_messages)
-        
-        logger.info(f"_compress_messages_by_omitting_messages: {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
-            
-        return final_messages
-    
-    def _middle_out_messages(self, messages: List[Dict[str, Any]], max_messages: int = 320) -> List[Dict[str, Any]]:
-        """Remove messages from the middle of the list, keeping max_messages total."""
-        if len(messages) <= max_messages:
-            return messages
-        
-        # Keep half from the beginning and half from the end
-        keep_start = max_messages // 2
-        keep_end = max_messages - keep_start
-        
-        return messages[:keep_start] + messages[-keep_end:]
-
-
    def add_tool(self, tool_class: Type[Tool], function_names: Optional[List[str]] = None, **kwargs):
        """Add a tool to the ThreadManager."""
        self.tool_registry.register_tool(tool_class, function_names, **kwargs)
@ -637,7 +364,7 @@ Here are the XML tools available with examples:
                    openapi_tool_schemas = self.tool_registry.get_openapi_schemas()
                    logger.debug(f"Retrieved {len(openapi_tool_schemas) if openapi_tool_schemas else 0} OpenAPI tool schemas")

-                prepared_messages = self._compress_messages(prepared_messages, llm_model)
+                prepared_messages = self.context_manager.compress_messages(prepared_messages, llm_model)

                # 5. Make LLM API call
                logger.debug("Making LLM API call")
--- a/backend/api.py
+++ b/backend/api.py
@ -22,7 +22,7 @@ from sandbox import api as sandbox_api
 from services import billing as billing_api
 from flags import api as feature_flags_api
 from services import transcription as transcription_api
-from services.mcp_custom import discover_custom_tools
+from mcp_service.mcp_custom import discover_custom_tools
 import sys
 from services import email_api
 from triggers import api as triggers_api
@ -151,8 +151,8 @@ app.include_router(billing_api.router, prefix="/api")

 app.include_router(feature_flags_api.router, prefix="/api")

-from mcp_local import api as mcp_api
-from mcp_local import secure_api as secure_mcp_api
+from mcp_service import api as mcp_api
+from mcp_service import secure_api as secure_mcp_api

 app.include_router(mcp_api.router, prefix="/api")
 app.include_router(secure_mcp_api.router, prefix="/api/secure-mcp")
--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@ -81,8 +81,8 @@ services:

  redis:
    image: redis:8-alpine
-    # ports:
-    #   - "127.0.0.1:6379:6379"
+    ports:
+      - "127.0.0.1:6379:6379"
    volumes:
      - redis_data:/data
      - ./services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro
@ -104,8 +104,8 @@ services:

  rabbitmq:
    image: rabbitmq
-    # ports:
-    #   - "127.0.0.1:5672:5672"
+    ports:
+      - "127.0.0.1:5672:5672"
    volumes:
      - rabbitmq_data:/var/lib/rabbitmq
    restart: unless-stopped
--- a/backend/mcp_service/init.py
+++ b/backend/mcp_service/init.py
--- a/backend/mcp_service/api.py
+++ b/backend/mcp_service/api.py
--- a/backend/mcp_service/client.py
+++ b/backend/mcp_service/client.py
--- a/backend/mcp_service/credential_manager.py
+++ b/backend/mcp_service/credential_manager.py
--- a/backend/mcp_service/generate_encryption_key.py
+++ b/backend/mcp_service/generate_encryption_key.py
--- a/backend/mcp_service/mcp_custom.py
+++ b/backend/mcp_service/mcp_custom.py
--- a/backend/mcp_service/secure_api.py
+++ b/backend/mcp_service/secure_api.py
--- a/backend/mcp_service/secure_client.py
+++ b/backend/mcp_service/secure_client.py
--- a/backend/mcp_service/template_manager.py
+++ b/backend/mcp_service/template_manager.py
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -12,7 +12,7 @@ classifiers = [
 requires-python = ">=3.11"
 dependencies = [
  "python-dotenv==1.0.1",
-  "litellm==1.66.1",
+  "litellm==1.72.2",
  "click==8.1.7",
  "questionary==2.0.1",
  "requests==2.32.3",
@ -38,7 +38,7 @@ dependencies = [
  "daytona-api-client==0.21.0",
  "daytona-api-client-async==0.21.0",
  "boto3==1.37.3",
-  "openai==1.72.0",
+  "openai==1.90.0",
  "nest-asyncio==1.6.0",
  "vncdotool==1.2.0",
  "tavily-python==0.5.4",
--- a/backend/services/mcp_temp.py
+++ b/backend/services/mcp_temp.py
@ -1,299 +0,0 @@
-import os
-import sys
-import json
-import asyncio
-import subprocess
-from typing import Dict, Any
-from concurrent.futures import ThreadPoolExecutor
-from fastapi import HTTPException # type: ignore
-from utils.logger import logger
-from mcp import ClientSession
-from mcp.client.sse import sse_client # type: ignore
-from mcp.client.streamable_http import streamablehttp_client # type: ignore
-
-windows_executor = ThreadPoolExecutor(max_workers=4)
-
-# def run_mcp_stdio_sync(command, args, env_vars, timeout=30):
-#     try:
-#         env = os.environ.copy()
-#         env.update(env_vars)
-        
-#         full_command = [command] + args
-        
-#         process = subprocess.Popen(
-#             full_command,
-#             stdin=subprocess.PIPE,
-#             stdout=subprocess.PIPE,
-#             stderr=subprocess.PIPE,
-#             env=env,
-#             text=True,
-#             bufsize=0,
-#             creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
-#         )
-        
-#         init_request = {
-#             "jsonrpc": "2.0",
-#             "id": 1,
-#             "method": "initialize",
-#             "params": {
-#                 "protocolVersion": "2024-11-05",
-#                 "capabilities": {},
-#                 "clientInfo": {"name": "mcp-client", "version": "1.0.0"}
-#             }
-#         }
-        
-#         process.stdin.write(json.dumps(init_request) + "\n")
-#         process.stdin.flush()
-
-#         init_response_line = process.stdout.readline().strip()
-#         if not init_response_line:
-#             raise Exception("No response from MCP server during initialization")
-        
-#         init_response = json.loads(init_response_line)
-        
-#         init_notification = {
-#             "jsonrpc": "2.0",
-#             "method": "notifications/initialized"
-#         }
-#         process.stdin.write(json.dumps(init_notification) + "\n")
-#         process.stdin.flush()
-        
-#         tools_request = {
-#             "jsonrpc": "2.0",
-#             "id": 2,
-#             "method": "tools/list",
-#             "params": {}
-#         }
-        
-#         process.stdin.write(json.dumps(tools_request) + "\n")
-#         process.stdin.flush()
-        
-#         tools_response_line = process.stdout.readline().strip()
-#         if not tools_response_line:
-#             raise Exception("No response from MCP server for tools list")
-        
-#         tools_response = json.loads(tools_response_line)
-        
-#         tools_info = []
-#         if "result" in tools_response and "tools" in tools_response["result"]:
-#             for tool in tools_response["result"]["tools"]:
-#                 tool_info = {
-#                     "name": tool["name"],
-#                     "description": tool.get("description", ""),
-#                     "input_schema": tool.get("inputSchema", {})
-#                 }
-#                 tools_info.append(tool_info)
-        
-#         return {
-#             "status": "connected",
-#             "transport": "stdio",
-#             "tools": tools_info
-#         }
-        
-#     except subprocess.TimeoutExpired:
-#         return {
-#             "status": "error",
-#             "error": f"Process timeout after {timeout} seconds",
-#             "tools": []
-#         }
-#     except json.JSONDecodeError as e:
-#         return {
-#             "status": "error",
-#             "error": f"Invalid JSON response: {str(e)}",
-#             "tools": []
-#         }
-#     except Exception as e:
-#         return {
-#             "status": "error",
-#             "error": str(e),
-#             "tools": []
-#         }
-#     finally:
-#         try:
-#             if 'process' in locals():
-#                 process.terminate()
-#                 process.wait(timeout=5)
-#         except:
-#             pass
-
-
-# async def connect_stdio_server_windows(server_name, server_config, all_tools, timeout):
-#     """Windows-compatible stdio connection using subprocess"""
-    
-#     logger.info(f"Connecting to {server_name} using Windows subprocess method")
-    
-#     command = server_config["command"]
-#     args = server_config.get("args", [])
-#     env_vars = server_config.get("env", {})
-    
-#     loop = asyncio.get_event_loop()
-#     result = await loop.run_in_executor(
-#         windows_executor,
-#         run_mcp_stdio_sync,
-#         command,
-#         args,
-#         env_vars,
-#         timeout
-#     )
-    
-#     all_tools[server_name] = result
-    
-#     if result["status"] == "connected":
-#         logger.info(f"  {server_name}: Connected via Windows subprocess ({len(result['tools'])} tools)")
-#     else:
-#         logger.error(f"  {server_name}: Error - {result['error']}")
-
-            
-# async def list_mcp_tools_mixed_windows(config, timeout=15):
-#     all_tools = {}
-    
-#     if "mcpServers" not in config:
-#         return all_tools
-    
-#     mcp_servers = config["mcpServers"]
-    
-#     for server_name, server_config in mcp_servers.items():
-#         logger.info(f"Connecting to MCP server: {server_name}")
-#         if server_config.get("disabled", False):
-#             all_tools[server_name] = {"status": "disabled", "tools": []}
-#             logger.info(f"  {server_name}: Disabled")
-#             continue
-            
-#         try:
-#             await connect_stdio_server_windows(server_name, server_config, all_tools, timeout)
-                    
-#         except asyncio.TimeoutError:
-#             all_tools[server_name] = {
-#                 "status": "error",
-#                 "error": f"Connection timeout after {timeout} seconds",
-#                 "tools": []
-#             }
-#             logger.error(f"  {server_name}: Timeout after {timeout} seconds")
-#         except Exception as e:
-#             error_msg = str(e)
-#             all_tools[server_name] = {
-#                 "status": "error",
-#                 "error": error_msg,
-#                 "tools": []
-#             }
-#             logger.error(f"  {server_name}: Error - {error_msg}")
-#             import traceback
-#             logger.debug(f"Full traceback for {server_name}: {traceback.format_exc()}")
-    
-#     return all_tools
-
-
-async def discover_custom_tools(request_type: str, config: Dict[str, Any]):
-    logger.info(f"Received custom MCP discovery request: type={request_type}")
-    logger.debug(f"Request config: {config}")
-    
-    tools = []
-    server_name = None
-    
-    # if request_type == 'json':
-    #     try:
-    #         all_tools = await list_mcp_tools_mixed_windows(config, timeout=30)
-    #         if "mcpServers" in config and config["mcpServers"]:
-    #             server_name = list(config["mcpServers"].keys())[0]
-
-    #             if server_name in all_tools:
-    #                 server_info = all_tools[server_name]
-    #                 if server_info["status"] == "connected":
-    #                     tools = server_info["tools"]
-    #                     logger.info(f"Found {len(tools)} tools for server {server_name}")
-    #                 else:
-    #                     error_msg = server_info.get("error", "Unknown error")
-    #                     logger.error(f"Server {server_name} failed: {error_msg}")
-    #                     raise HTTPException(
-    #                         status_code=400, 
-    #                         detail=f"Failed to connect to MCP server '{server_name}': {error_msg}"
-    #                     )
-    #             else:
-    #                 logger.error(f"Server {server_name} not found in results")
-    #                 raise HTTPException(status_code=400, detail=f"Server '{server_name}' not found in results")
-    #         else:
-    #             logger.error("No MCP servers configured")
-    #             raise HTTPException(status_code=400, detail="No MCP servers configured")
-                
-    #     except HTTPException:
-    #         raise
-    #     except Exception as e:
-    #         logger.error(f"Error connecting to stdio MCP server: {e}")
-    #         import traceback
-    #         logger.error(f"Full traceback: {traceback.format_exc()}")
-    #         raise HTTPException(status_code=400, detail=f"Failed to connect to MCP server: {str(e)}")
-    
-    # if request_type == 'http':
-    #     if 'url' not in config:
-    #         raise HTTPException(status_code=400, detail="HTTP configuration must include 'url' field")
-    #     url = config['url']
-    #     await connect_streamable_http_server(url)
-    #     tools = await connect_streamable_http_server(url)
-
-    # elif request_type == 'sse':
-    #     if 'url' not in config:
-    #         raise HTTPException(status_code=400, detail="SSE configuration must include 'url' field")
-        
-    #     url = config['url']
-    #     headers = config.get('headers', {})
-        
-    #     try:
-    #         async with asyncio.timeout(15):
-    #             try:
-    #                 async with sse_client(url, headers=headers) as (read, write):
-    #                     async with ClientSession(read, write) as session:
-    #                         await session.initialize()
-    #                         tools_result = await session.list_tools()
-    #                         tools_info = []
-    #                         for tool in tools_result.tools:
-    #                             tool_info = {
-    #                                 "name": tool.name,
-    #                                 "description": tool.description,
-    #                                 "input_schema": tool.inputSchema
-    #                             }
-    #                             tools_info.append(tool_info)
-                            
-    #                         for tool_info in tools_info:
-    #                             tools.append({
-    #                                 "name": tool_info["name"],
-    #                                 "description": tool_info["description"],
-    #                                 "inputSchema": tool_info["input_schema"]
-    #                             })
-    #             except TypeError as e:
-    #                 if "unexpected keyword argument" in str(e):
-    #                     async with sse_client(url) as (read, write):
-    #                         async with ClientSession(read, write) as session:
-    #                             await session.initialize()
-    #                             tools_result = await session.list_tools()
-    #                             tools_info = []
-    #                             for tool in tools_result.tools:
-    #                                 tool_info = {
-    #                                     "name": tool.name,
-    #                                     "description": tool.description,
-    #                                     "input_schema": tool.inputSchema
-    #                                 }
-    #                                 tools_info.append(tool_info)
-                                
-    #                             for tool_info in tools_info:
-    #                                 tools.append({
-    #                                     "name": tool_info["name"],
-    #                                     "description": tool_info["description"],
-    #                                     "inputSchema": tool_info["input_schema"]
-    #                                 })
-    #                 else:
-    #                     raise
-    #     except asyncio.TimeoutError:
-    #         raise HTTPException(status_code=408, detail="Connection timeout - server took too long to respond")
-    #     except Exception as e:
-    #         logger.error(f"Error connecting to SSE MCP server: {e}")
-    #         raise HTTPException(status_code=400, detail=f"Failed to connect to MCP server: {str(e)}")
-    # else:
-    #     raise HTTPException(status_code=400, detail="Invalid server type. Must be 'json' or 'sse'")
-    
-    # response_data = {"tools": tools, "count": len(tools)}
-    
-    # if server_name:
-    #     response_data["serverName"] = server_name
-    
-    # logger.info(f"Returning {len(tools)} tools for server {server_name}")
-    # return response_data 
--- a/backend/uv.lock
+++ b/backend/uv.lock
--- a/frontend/src/components/home/sections/footer-section.tsx
+++ b/frontend/src/components/home/sections/footer-section.tsx
@ -136,10 +136,10 @@ export function FooterSection() {
        className="block w-full h-48 md:h-64 relative mt-24 z-0 cursor-pointer"
      >
        <div className="absolute inset-0 bg-gradient-to-t from-transparent to-background z-10 from-40%" />
-        <div className="absolute inset-0 mx-6">
+        <div className="absolute inset-0 ">
          <FlickeringGrid
-            text={tablet ? 'Agents Agents Agents' : 'Agents Agents Agents'}
-            fontSize={tablet ? 70 : 90}
+            text={tablet ? 'Agents' : 'Agents Agents Agents'}
+            fontSize={tablet ? 60 : 90}
            className="h-full w-full"
            squareSize={2}
            gridGap={tablet ? 2 : 3}
--- a/frontend/src/components/sidebar/kortix-logo.tsx
+++ b/frontend/src/components/sidebar/kortix-logo.tsx
@ -8,7 +8,7 @@ interface KortixLogoProps {
  size?: number;
 }
 export function KortixLogo({ size = 24 }: KortixLogoProps) {
-  const { theme } = useTheme();
+  const { theme, systemTheme } = useTheme();
  const [mounted, setMounted] = useState(false);

  // After mount, we can access the theme
@ -16,13 +16,17 @@ export function KortixLogo({ size = 24 }: KortixLogoProps) {
    setMounted(true);
  }, []);

+  const shouldInvert = mounted && (
+    theme === 'dark' || (theme === 'system' && systemTheme === 'dark')
+  );
+
  return (
    <Image
        src="/kortix-symbol.svg"
        alt="Kortix"
        width={size}
        height={size}
-        className={`${mounted && theme === 'dark' ? 'invert' : ''} flex-shrink-0`}
+        className={`${shouldInvert ? 'invert' : ''} flex-shrink-0`}
      />
  );
 }
--- a/frontend/src/components/sidebar/nav-agents.tsx
+++ b/frontend/src/components/sidebar/nav-agents.tsx
@ -7,7 +7,6 @@ import {
  Link as LinkIcon,
  MoreHorizontal,
  Trash2,
-  Plus,
  MessagesSquare,
  Loader2,
  Share2,
@ -383,97 +382,37 @@ export function NavAgents() {
                  <Trash2 className="h-4 w-4" />
                </Button>
              </>
-            ) : (
-              <Tooltip>
-                <TooltipTrigger asChild>
-                  <div>
-                    <Link
-                      href="/dashboard"
-                      className="text-muted-foreground hover:text-foreground h-7 w-7 flex items-center justify-center rounded-md"
-                    >
-                      <Plus className="h-4 w-4" />
-                      <span className="sr-only">New Agent</span>
-                    </Link>
-                  </div>
-                </TooltipTrigger>
-                <TooltipContent>New Agent</TooltipContent>
-              </Tooltip>
-            )}
+            ) : null}
          </div>
        ) : null}
      </div>

      <SidebarMenu className="overflow-y-auto max-h-[calc(100vh-200px)] [&::-webkit-scrollbar]:hidden [-ms-overflow-style:'none'] [scrollbar-width:'none']">
-        {state === 'collapsed' && (
-          <SidebarMenuItem>
-            <Tooltip>
-              <TooltipTrigger asChild>
-                <div>
-                  <SidebarMenuButton asChild>
-                    <Link href="/dashboard" className="flex items-center">
-                      <Plus className="h-4 w-4" />
-                      <span>New Agent</span>
-                    </Link>
-                  </SidebarMenuButton>
-                </div>
-              </TooltipTrigger>
-              <TooltipContent>New Agent</TooltipContent>
-            </Tooltip>
-          </SidebarMenuItem>
-        )}

-        {isLoading ? (
-          // Show skeleton loaders while loading
-          Array.from({ length: 3 }).map((_, index) => (
-            <SidebarMenuItem key={`skeleton-${index}`}>
-              <SidebarMenuButton>
-                <div className="h-4 w-4 bg-sidebar-foreground/10 rounded-md animate-pulse"></div>
-                <div className="h-3 bg-sidebar-foreground/10 rounded w-3/4 animate-pulse"></div>
-              </SidebarMenuButton>
-            </SidebarMenuItem>
-          ))
-        ) : combinedThreads.length > 0 ? (
-          // Show all threads with project info
+
+        {state !== 'collapsed' && (
          <>
-            {combinedThreads.map((thread) => {
-              // Check if this thread is currently active
-              const isActive = pathname?.includes(thread.threadId) || false;
-              const isThreadLoading = loadingThreadId === thread.threadId;
-              const isSelected = selectedThreads.has(thread.threadId);
+            {isLoading ? (
+              // Show skeleton loaders while loading
+              Array.from({ length: 3 }).map((_, index) => (
+                <SidebarMenuItem key={`skeleton-${index}`}>
+                  <SidebarMenuButton>
+                    <div className="h-4 w-4 bg-sidebar-foreground/10 rounded-md animate-pulse"></div>
+                    <div className="h-3 bg-sidebar-foreground/10 rounded w-3/4 animate-pulse"></div>
+                  </SidebarMenuButton>
+                </SidebarMenuItem>
+              ))
+            ) : combinedThreads.length > 0 ? (
+              // Show all threads with project info
+              <>
+                {combinedThreads.map((thread) => {
+                  // Check if this thread is currently active
+                  const isActive = pathname?.includes(thread.threadId) || false;
+                  const isThreadLoading = loadingThreadId === thread.threadId;
+                  const isSelected = selectedThreads.has(thread.threadId);

-              return (
-                <SidebarMenuItem key={`thread-${thread.threadId}`} className="group">
-                  {state === 'collapsed' ? (
-                    <Tooltip>
-                      <TooltipTrigger asChild>
-                        <div>
-                          <SidebarMenuButton
-                            asChild
-                            className={
-                              isActive ? 'bg-accent text-accent-foreground' :
-                                isSelected ? 'bg-primary/10' : ''
-                            }
-                          >
-                            <Link
-                              href={thread.url}
-                              onClick={(e) =>
-                                handleThreadClick(e, thread.threadId, thread.url)
-                              }
-                            >
-                              {isThreadLoading ? (
-                                <Loader2 className="h-4 w-4 animate-spin" />
-                              ) : (
-                                <MessagesSquare className="h-4 w-4" />
-                              )}
-                              <span>{thread.projectName}</span>
-                            </Link>
-                          </SidebarMenuButton>
-                        </div>
-                      </TooltipTrigger>
-                      <TooltipContent>{thread.projectName}</TooltipContent>
-                    </Tooltip>
-                  ) : (
-                    <div className="relative">
+                  return (
+                    <SidebarMenuItem key={`thread-${thread.threadId}`} className="group/row">
                      <SidebarMenuButton
                        asChild
                        className={`relative ${isActive
@ -483,113 +422,101 @@ export function NavAgents() {
                            : ''
                          }`}
                      >
-                        <Link
-                          href={thread.url}
-                          onClick={(e) =>
-                            handleThreadClick(e, thread.threadId, thread.url)
-                          }
-                          className="flex items-center"
-                        >
-                          <div className="flex items-center group/icon relative">
-                            {/* Show checkbox on hover or when selected, otherwise show MessagesSquare */}
-                            {isThreadLoading ? (
-                              <Loader2 className="h-4 w-4 animate-spin" />
-                            ) : (
-                              <>
-                                {/* MessagesSquare icon - hidden on hover if not selected */}
-                                <MessagesSquare
-                                  className={`h-4 w-4 transition-opacity duration-150 ${isSelected ? 'opacity-0' : 'opacity-100 group-hover/icon:opacity-0'
-                                    }`}
-                                />
-
-                                {/* Checkbox - appears on hover or when selected */}
-                                <div
-                                  className={`absolute inset-0 flex items-center justify-center transition-opacity duration-150 ${isSelected
-                                    ? 'opacity-100'
-                                    : 'opacity-0 group-hover/icon:opacity-100'
-                                    }`}
-                                  onClick={(e) => toggleThreadSelection(thread.threadId, e)}
-                                >
-                                  <div
-                                    className={`h-4 w-4 border rounded cursor-pointer hover:bg-muted/50 transition-colors flex items-center justify-center ${isSelected
-                                      ? 'bg-primary border-primary'
-                                      : 'border-muted-foreground/30 bg-background'
-                                      }`}
-                                  >
-                                    {isSelected && <Check className="h-3 w-3 text-primary-foreground" />}
-                                  </div>
-                                </div>
-                              </>
-                            )}
-                          </div>
-                          <span className="ml-2">{thread.projectName}</span>
-                        </Link>
-                      </SidebarMenuButton>
-                    </div>
-                  )}
-                  {state !== 'collapsed' && !isSelected && (
-                    <DropdownMenu>
-                      <DropdownMenuTrigger asChild>
-                        <SidebarMenuAction
-                          showOnHover
-                          className="group-hover:opacity-100"
-                          onClick={() => {
-                            // Ensure pointer events are enabled when dropdown opens
-                            document.body.style.pointerEvents = 'auto';
-                          }}
-                        >
-                          <MoreHorizontal />
-                          <span className="sr-only">More</span>
-                        </SidebarMenuAction>
-                      </DropdownMenuTrigger>
-                      <DropdownMenuContent
-                        className="w-56 rounded-lg"
-                        side={isMobile ? 'bottom' : 'right'}
-                        align={isMobile ? 'end' : 'start'}
-                      >
-                        <DropdownMenuItem onClick={() => {
-                          setSelectedItem({ threadId: thread?.threadId, projectId: thread?.projectId })
-                          setShowShareModal(true)
-                        }}>
-                          <Share2 className="text-muted-foreground" />
-                          <span>Share Chat</span>
-                        </DropdownMenuItem>
-                        <DropdownMenuItem asChild>
-                          <a
+                        <div className="flex items-center w-full">
+                          <Link
                            href={thread.url}
-                            target="_blank"
-                            rel="noopener noreferrer"
+                            onClick={(e) =>
+                              handleThreadClick(e, thread.threadId, thread.url)
+                            }
+                            className="flex items-center flex-1 min-w-0"
                          >
-                            <ArrowUpRight className="text-muted-foreground" />
-                            <span>Open in New Tab</span>
-                          </a>
-                        </DropdownMenuItem>
-                        <DropdownMenuSeparator />
-                        <DropdownMenuItem
-                          onClick={() =>
-                            handleDeleteThread(
-                              thread.threadId,
-                              thread.projectName,
-                            )
-                          }
-                        >
-                          <Trash2 className="text-muted-foreground" />
-                          <span>Delete</span>
-                        </DropdownMenuItem>
-                      </DropdownMenuContent>
-                    </DropdownMenu>
-                  )}
-                </SidebarMenuItem>
-              );
-            })}
+                            {isThreadLoading ? (
+                              <Loader2 className="h-4 w-4 animate-spin mr-2 flex-shrink-0" />
+                            ) : null}
+                            <span className="truncate">{thread.projectName}</span>
+                          </Link>
+                          
+                          {/* Checkbox - only visible on hover of this specific area */}
+                          <div
+                            className="mr-1 flex-shrink-0 w-4 h-4 flex items-center justify-center group/checkbox"
+                            onClick={(e) => toggleThreadSelection(thread.threadId, e)}
+                          >
+                            <div
+                              className={`h-4 w-4 border rounded cursor-pointer transition-all duration-150 flex items-center justify-center ${isSelected
+                                ? 'opacity-100 bg-primary border-primary hover:bg-primary/90'
+                                : 'opacity-0 group-hover/checkbox:opacity-100 border-muted-foreground/30 bg-background hover:bg-muted/50'
+                                }`}
+                            >
+                              {isSelected && <Check className="h-3 w-3 text-primary-foreground" />}
+                            </div>
+                          </div>
+
+                          {/* Dropdown Menu - inline with content */}
+                          <DropdownMenu>
+                            <DropdownMenuTrigger asChild>
+                              <button
+                                className="flex-shrink-0 w-4 h-4 flex items-center justify-center hover:bg-muted/50 rounded transition-all duration-150 text-muted-foreground hover:text-foreground opacity-0 group-hover/row:opacity-100"
+                                onClick={(e) => {
+                                  e.preventDefault();
+                                  e.stopPropagation();
+                                  // Ensure pointer events are enabled when dropdown opens
+                                  document.body.style.pointerEvents = 'auto';
+                                }}
+                              >
+                                <MoreHorizontal className="h-4 w-4" />
+                                <span className="sr-only">More actions</span>
+                              </button>
+                            </DropdownMenuTrigger>
+                            <DropdownMenuContent
+                              className="w-56 rounded-lg"
+                              side={isMobile ? 'bottom' : 'right'}
+                              align={isMobile ? 'end' : 'start'}
+                            >
+                              <DropdownMenuItem onClick={() => {
+                                setSelectedItem({ threadId: thread?.threadId, projectId: thread?.projectId })
+                                setShowShareModal(true)
+                              }}>
+                                <Share2 className="text-muted-foreground" />
+                                <span>Share Chat</span>
+                              </DropdownMenuItem>
+                              <DropdownMenuItem asChild>
+                                <a
+                                  href={thread.url}
+                                  target="_blank"
+                                  rel="noopener noreferrer"
+                                >
+                                  <ArrowUpRight className="text-muted-foreground" />
+                                  <span>Open in New Tab</span>
+                                </a>
+                              </DropdownMenuItem>
+                              <DropdownMenuSeparator />
+                              <DropdownMenuItem
+                                onClick={() =>
+                                  handleDeleteThread(
+                                    thread.threadId,
+                                    thread.projectName,
+                                  )
+                                }
+                              >
+                                <Trash2 className="text-muted-foreground" />
+                                <span>Delete</span>
+                              </DropdownMenuItem>
+                            </DropdownMenuContent>
+                          </DropdownMenu>
+                        </div>
+                      </SidebarMenuButton>
+                    </SidebarMenuItem>
+                  );
+                })}
+              </>
+            ) : (
+              <SidebarMenuItem>
+                <SidebarMenuButton className="text-sidebar-foreground/70">
+                  <span>No tasks yet</span>
+                </SidebarMenuButton>
+              </SidebarMenuItem>
+            )}
          </>
-        ) : (
-          <SidebarMenuItem>
-            <SidebarMenuButton className="text-sidebar-foreground/70">
-              <MessagesSquare className="h-4 w-4" />
-              <span>No tasks yet</span>
-            </SidebarMenuButton>
-          </SidebarMenuItem>
        )}
      </SidebarMenu>

--- a/frontend/src/components/sidebar/sidebar-left.tsx
+++ b/frontend/src/components/sidebar/sidebar-left.tsx
@ -2,7 +2,7 @@

 import * as React from 'react';
 import Link from 'next/link';
-import { Bot, Menu, Store, Shield, Key, Workflow } from 'lucide-react';
+import { Bot, Menu, Store, Shield, Key, Workflow, Plus } from 'lucide-react';

 import { NavAgents } from '@/components/sidebar/nav-agents';
 import { NavUserWithTeams } from '@/components/sidebar/nav-user-with-teams';
@ -132,22 +132,20 @@ export function SidebarLeft({
      <SidebarContent className="[&::-webkit-scrollbar]:hidden [-ms-overflow-style:'none'] [scrollbar-width:'none']">
        {!flagsLoading && (customAgentsEnabled || marketplaceEnabled) && (
          <SidebarGroup>
-            {customAgentsEnabled && (
-              <Link href="/agents">
-                <SidebarMenuButton className={cn({
-                  'bg-primary/10 font-medium': pathname === '/agents',
-                })}>
-                  <Bot className="h-4 w-4 mr-2" />
-                  <span className="flex items-center justify-between w-full">
-                    Agent Playground
-                  </span>
-                </SidebarMenuButton>
-              </Link>
-            )}
+            <Link href="/dashboard">
+              <SidebarMenuButton className={cn({
+                'bg-accent text-accent-foreground font-medium': pathname === '/dashboard',
+              })}>
+                <Plus className="h-4 w-4 mr-2" />
+                <span className="flex items-center justify-between w-full">
+                  New Task
+                </span>
+              </SidebarMenuButton>
+            </Link>
            {marketplaceEnabled && (
              <Link href="/marketplace">
                <SidebarMenuButton className={cn({
-                  'bg-primary/10 font-medium': pathname === '/marketplace',
+                  'bg-accent text-accent-foreground font-medium': pathname === '/marketplace',
                })}>
                  <Store className="h-4 w-4 mr-2" />
                  <span className="flex items-center justify-between w-full">
@ -156,10 +154,22 @@ export function SidebarLeft({
                </SidebarMenuButton>
              </Link>
            )}
+            {customAgentsEnabled && (
+              <Link href="/agents">
+                <SidebarMenuButton className={cn({
+                  'bg-accent text-accent-foreground font-medium': pathname === '/agents',
+                })}>
+                  <Bot className="h-4 w-4 mr-2" />
+                  <span className="flex items-center justify-between w-full">
+                    Agent Playground
+                  </span>
+                </SidebarMenuButton>
+              </Link>
+            )}
            {customAgentsEnabled && (
              <Link href="/settings/credentials">
                <SidebarMenuButton className={cn({
-                  'bg-primary/10 font-medium': pathname === '/settings/credentials',
+                  'bg-accent text-accent-foreground font-medium': pathname === '/settings/credentials',
                })}>
                  <Key className="h-4 w-4 mr-2" />
                  <span className="flex items-center justify-between w-full">
--- a/frontend/src/components/thread/chat-input/chat-input.tsx
+++ b/frontend/src/components/thread/chat-input/chat-input.tsx
@ -311,7 +311,7 @@ export const ChatInput = forwardRef<ChatInputHandles, ChatInputProps>(
          </div>
        </Card>

-        {isAgentRunning && (
+        {/* {isAgentRunning && (
          <motion.div
            initial={{ opacity: 0, y: -10 }}
            animate={{ opacity: 1, y: 0 }}
@ -322,7 +322,8 @@ export const ChatInput = forwardRef<ChatInputHandles, ChatInputProps>(
              <span>{agentName ? `${agentName} is working...` : 'Suna is working...'}</span>
            </div>
          </motion.div>
-        )}
+        )} */}
+
      </div>
    );
  },
--- a/frontend/src/components/thread/thread-site-header.tsx
+++ b/frontend/src/components/thread/thread-site-header.tsx
@ -264,7 +264,7 @@ export function SiteHeader({
                </TooltipContent>
              </Tooltip>

-              <Tooltip>
+              {/* <Tooltip>
                <TooltipTrigger asChild>
                  <Button
                    variant="ghost"
@ -278,7 +278,7 @@ export function SiteHeader({
                <TooltipContent>
                  <p>Toggle Computer Preview (CMD+I)</p>
                </TooltipContent>
-              </Tooltip>
+              </Tooltip> */}
            </TooltipProvider>
          )}
        </div>
--- a/frontend/src/lib/home.tsx
+++ b/frontend/src/lib/home.tsx
@ -143,7 +143,7 @@ export const siteConfig = {
      hours: '2 hours',
      features: [
        '$20/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'More models',
      ],
@ -165,7 +165,7 @@ export const siteConfig = {
      hours: '6 hours',
      features: [
        '$50/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'More models',
      ],
@ -186,7 +186,7 @@ export const siteConfig = {
      hours: '12 hours',
      features: [
        '$100/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'Priority support',
      ],
@ -208,7 +208,7 @@ export const siteConfig = {
      hours: '25 hours',
      features: [
        '$200/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'More models',
      ],
@ -229,7 +229,7 @@ export const siteConfig = {
      hours: '50 hours',
      features: [
        '$400/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'Access to intelligent Model (Full Suna)',
        'Priority support',
@ -253,7 +253,7 @@ export const siteConfig = {
      hours: '125 hours',
      features: [
        '$800/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'Access to intelligent Model (Full Suna)',
        'Priority support',
@ -278,7 +278,7 @@ export const siteConfig = {
      hours: '200 hours',
      features: [
        '$1000/month usage',
-        '+ $5 free included',
+        // '+ $5 free included',
        'Private projects',
        'Access to intelligent Model (Full Suna)',
        'Priority support',