Merge pull request #36 from kortix-ai/bring-back-browser-use

Bring back using browser with pure playwright
2025-04-15 19:11:29 +01:00 · 2025-04-15 19:11:29 +01:00 · f0d7392d3b
parent bd847bafbb f0c3c52cf4
commit f0d7392d3b
17 changed files with 2915 additions and 281 deletions
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@ -57,6 +57,17 @@ You have the ability to execute operations using both Python and CLI tools:
 - Finding recent news, articles, and information beyond training data
 - Crawling webpage content for detailed information extraction

+### 2.2.5 BROWSER TOOLS AND CAPABILITIES
+- BROWSER OPERATIONS:
+  * Navigate to URLs and manage history
+  * Fill forms and submit data
+  * Click elements and interact with pages
+  * Extract text and HTML content
+  * Wait for elements to load
+  * Scroll pages and handle infinite scroll
+  * YOU CAN DO ANYTHING ON THE BROWSER - including clicking on elements, filling forms, submitting data, etc.
+  * The browser is in a sandboxed environment, so nothing to worry about.
+
 # 3. TOOLKIT & METHODOLOGY

 ## 3.1 TOOL SELECTION PRINCIPLES
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@ -12,6 +12,7 @@ from agentpress.thread_manager import ThreadManager
 from agentpress.response_processor import ProcessorConfig
 from agent.tools.sb_shell_tool import SandboxShellTool
 from agent.tools.sb_files_tool import SandboxFilesTool
+from agent.tools.sb_browser_tool import SandboxBrowserTool
 from agent.prompt import get_system_prompt
 from sandbox.sandbox import daytona, create_sandbox, get_or_start_sandbox
 from utils.billing import check_billing_status, get_account_id_from_thread
@ -52,22 +53,28 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
    else:
        sandbox_pass = str(uuid4())
        sandbox = create_sandbox(sandbox_pass)
+        print(f"\033[91m{sandbox.get_preview_link(6080)}/vnc_lite.html?password={sandbox_pass}\033[0m")
        sandbox_id = sandbox.id
        await client.table('projects').update({
            'sandbox': {
                'id': sandbox_id,
-                'pass': sandbox_pass
+                'pass': sandbox_pass,
+                'vnc_preview': sandbox.get_preview_link(6080)
            }
        }).eq('project_id', project_id).execute()
    
-    # thread_manager.add_tool(SandboxBrowseTool, sandbox=sandbox)
    thread_manager.add_tool(SandboxShellTool, sandbox=sandbox)
    thread_manager.add_tool(SandboxFilesTool, sandbox=sandbox)
+    thread_manager.add_tool(SandboxBrowserTool, sandbox=sandbox, thread_id=thread_id, thread_manager=thread_manager)
+    thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)
    thread_manager.add_tool(MessageTool)
    thread_manager.add_tool(WebSearchTool)
-    thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)

-    system_message = { "role": "system", "content": get_system_prompt() }
+    xml_examples = ""
+    for tag_name, example in thread_manager.tool_registry.get_xml_examples().items():
+        xml_examples += f"{example}\n"
+
+    system_message = { "role": "system", "content": get_system_prompt() + "\n\n" + f"<tool_examples>\n{xml_examples}\n</tool_examples>" }

    model_name = "anthropic/claude-3-7-sonnet-latest"
    # model_name = "groq/llama-3.3-70b-versatile"
@ -108,6 +115,37 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
                print(f"Last message was from assistant, stopping execution")
                continue_execution = False
                break
+        # Get the latest message from messages table that its tpye is browser_state
+        
+        latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
+        temporary_message = None
+        if latest_browser_state.data and len(latest_browser_state.data) > 0:
+            try:
+                content = json.loads(latest_browser_state.data[0]["content"])
+                screenshot_base64 = content["screenshot_base64"]
+                # Create a copy of the browser state without screenshot
+                browser_state = content.copy()
+                browser_state.pop('screenshot_base64', None)
+                browser_state.pop('screenshot_url', None) 
+                browser_state.pop('screenshot_url_base64', None)
+                temporary_message = { "role": "user", "content": [] }
+                if browser_state:
+                    temporary_message["content"].append({
+                        "type": "text",
+                        "text": f"The following is the current state of the browser:\n{browser_state}"
+                    })
+                if screenshot_base64:
+                    temporary_message["content"].append({
+                        "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{screenshot_base64}",
+                            }
+                    })
+                else:
+                    print("@@@@@ THIS TIME NO SCREENSHOT!!")
+            except Exception as e:
+                print(f"Error parsing browser state: {e}")
+                # print(latest_browser_state.data[0])

        response = await thread_manager.run_thread(
            thread_id=thread_id,
@ -115,9 +153,10 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
            stream=stream,
            llm_model=model_name,
            llm_temperature=0,
-            llm_max_tokens=64000,
+            llm_max_tokens=128000,
            tool_choice="auto",
            max_xml_tool_calls=1,
+            temporary_message=temporary_message,
            processor_config=ProcessorConfig(
                xml_tool_calling=True,
                native_tool_calling=False,
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@ -0,0 +1,846 @@
+import traceback
+import json
+
+from agentpress.tool import ToolResult, openapi_schema, xml_schema
+from agentpress.thread_manager import ThreadManager
+from sandbox.sandbox import SandboxToolsBase, Sandbox
+from utils.logger import logger
+
+
+class SandboxBrowserTool(SandboxToolsBase):
+    """Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
+    
+    def __init__(self, sandbox: Sandbox, thread_id: str, thread_manager: ThreadManager):
+        super().__init__(sandbox)
+        self.thread_id = thread_id
+        self.thread_manager = thread_manager
+
+    async def _execute_browser_action(self, endpoint: str, params: dict = None, method: str = "POST") -> ToolResult:
+        """Execute a browser automation action through the API
+        
+        Args:
+            endpoint (str): The API endpoint to call
+            params (dict, optional): Parameters to send. Defaults to None.
+            method (str, optional): HTTP method to use. Defaults to "POST".
+            
+        Returns:
+            ToolResult: Result of the execution
+        """
+        try:
+            # Build the curl command
+            url = f"http://localhost:8002/api/automation/{endpoint}"
+            
+            if method == "GET" and params:
+                query_params = "&".join([f"{k}={v}" for k, v in params.items()])
+                url = f"{url}?{query_params}"
+                curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
+            else:
+                curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
+                if params:
+                    json_data = json.dumps(params)
+                    curl_cmd += f" -d '{json_data}'"
+            
+            print(f"\033[95mExecuting curl command:\033[0m")
+            print(f"{curl_cmd}")
+            
+            response = self.sandbox.process.exec(curl_cmd, timeout=30)
+            
+            if response.exit_code == 0:
+                try:
+                    result = json.loads(response.result)
+
+                    if not "content" in result:
+                        result["content"] = ""
+                    
+                    if not "role" in result:
+                        result["role"] = "assistant"
+
+                    logger.info("Browser automation request completed successfully")
+
+                    # Add full result to thread messages for state tracking
+                    await self.thread_manager.add_message(
+                        thread_id=self.thread_id,
+                        type="browser_state",
+                        content=result,
+                        is_llm_message=False
+                    )
+
+                    # Return tool-specific success response
+                    success_response = {
+                        "success": True,
+                        "message": result.get("message", "Browser action completed successfully")
+                    }
+
+                    # Add relevant browser-specific info
+                    if result.get("url"):
+                        success_response["url"] = result["url"]
+                    if result.get("title"):
+                        success_response["title"] = result["title"]
+                    if result.get("element_count"):
+                        success_response["elements_found"] = result["element_count"]
+                    if result.get("pixels_below"):
+                        success_response["scrollable_content"] = result["pixels_below"] > 0
+
+                    return self.success_response(success_response)
+
+                except json.JSONDecodeError:
+                    logger.error(f"Failed to parse response JSON: {response.result}")
+                    return self.fail_response(f"Failed to parse response JSON: {response.result}")
+            else:
+                logger.error(f"Browser automation request failed: {response.result}")
+                return self.fail_response(f"Browser automation request failed: {response.result}")
+
+        except Exception as e:
+            logger.error(f"Error executing browser action: {e}")
+            print(traceback.format_exc())
+            return self.fail_response(f"Error executing browser action: {e}")
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_navigate_to",
+            "description": "Navigate to a specific url",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "type": "string",
+                        "description": "The url to navigate to"
+                    }
+                },
+                "required": ["url"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-navigate-to",
+        mappings=[
+            {"param_name": "url", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-navigate-to>
+        https://example.com
+        </browser-navigate-to>
+        '''
+    )
+    async def browser_navigate_to(self, url: str) -> ToolResult:
+        """Navigate to a specific url
+        
+        Args:
+            url (str): The url to navigate to
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mNavigating to: {url}\033[0m")
+        return await self._execute_browser_action("navigate_to", {"url": url})
+
+    # @openapi_schema({
+    #     "type": "function",
+    #     "function": {
+    #         "name": "browser_search_google",
+    #         "description": "Search Google with the provided query",
+    #         "parameters": {
+    #             "type": "object",
+    #             "properties": {
+    #                 "query": {
+    #                     "type": "string",
+    #                     "description": "The search query to use"
+    #                 }
+    #             },
+    #             "required": ["query"]
+    #         }
+    #     }
+    # })
+    # @xml_schema(
+    #     tag_name="browser-search-google",
+    #     mappings=[
+    #         {"param_name": "query", "node_type": "content", "path": "."}
+    #     ],
+    #     example='''
+    #     <browser-search-google>
+    #     artificial intelligence news
+    #     </browser-search-google>
+    #     '''
+    # )
+    # async def browser_search_google(self, query: str) -> ToolResult:
+    #     """Search Google with the provided query
+        
+    #     Args:
+    #         query (str): The search query to use
+            
+    #     Returns:
+    #         dict: Result of the execution
+    #     """
+    #     print(f"\033[95mSearching Google for: {query}\033[0m")
+    #     return await self._execute_browser_action("search_google", {"query": query})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_go_back",
+            "description": "Navigate back in browser history",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-go-back",
+        mappings=[],
+        example='''
+        <browser-go-back></browser-go-back>
+        '''
+    )
+    async def browser_go_back(self) -> ToolResult:
+        """Navigate back in browser history
+        
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mNavigating back in browser history\033[0m")
+        return await self._execute_browser_action("go_back", {})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_wait",
+            "description": "Wait for the specified number of seconds",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "seconds": {
+                        "type": "integer",
+                        "description": "Number of seconds to wait (default: 3)"
+                    }
+                }
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-wait",
+        mappings=[
+            {"param_name": "seconds", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-wait>
+        5
+        </browser-wait>
+        '''
+    )
+    async def browser_wait(self, seconds: int = 3) -> ToolResult:
+        """Wait for the specified number of seconds
+        
+        Args:
+            seconds (int, optional): Number of seconds to wait. Defaults to 3.
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mWaiting for {seconds} seconds\033[0m")
+        return await self._execute_browser_action("wait", {"seconds": seconds})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_click_element",
+            "description": "Click on an element by index",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the element to click"
+                    }
+                },
+                "required": ["index"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-click-element",
+        mappings=[
+            {"param_name": "index", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-click-element>
+        2
+        </browser-click-element>
+        '''
+    )
+    async def browser_click_element(self, index: int) -> ToolResult:
+        """Click on an element by index
+        
+        Args:
+            index (int): The index of the element to click
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mClicking element with index: {index}\033[0m")
+        return await self._execute_browser_action("click_element", {"index": index})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_input_text",
+            "description": "Input text into an element",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the element to input text into"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "The text to input"
+                    }
+                },
+                "required": ["index", "text"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-input-text",
+        mappings=[
+            {"param_name": "index", "node_type": "attribute", "path": "."},
+            {"param_name": "text", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-input-text index="2">
+        Hello, world!
+        </browser-input-text>
+        '''
+    )
+    async def browser_input_text(self, index: int, text: str) -> ToolResult:
+        """Input text into an element
+        
+        Args:
+            index (int): The index of the element to input text into
+            text (str): The text to input
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mInputting text into element {index}: {text}\033[0m")
+        return await self._execute_browser_action("input_text", {"index": index, "text": text})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_send_keys",
+            "description": "Send keyboard keys such as Enter, Escape, or keyboard shortcuts",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "keys": {
+                        "type": "string",
+                        "description": "The keys to send (e.g., 'Enter', 'Escape', 'Control+a')"
+                    }
+                },
+                "required": ["keys"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-send-keys",
+        mappings=[
+            {"param_name": "keys", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-send-keys>
+        Enter
+        </browser-send-keys>
+        '''
+    )
+    async def browser_send_keys(self, keys: str) -> ToolResult:
+        """Send keyboard keys
+        
+        Args:
+            keys (str): The keys to send (e.g., 'Enter', 'Escape', 'Control+a')
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mSending keys: {keys}\033[0m")
+        return await self._execute_browser_action("send_keys", {"keys": keys})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_switch_tab",
+            "description": "Switch to a different browser tab",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "page_id": {
+                        "type": "integer",
+                        "description": "The ID of the tab to switch to"
+                    }
+                },
+                "required": ["page_id"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-switch-tab",
+        mappings=[
+            {"param_name": "page_id", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-switch-tab>
+        1
+        </browser-switch-tab>
+        '''
+    )
+    async def browser_switch_tab(self, page_id: int) -> ToolResult:
+        """Switch to a different browser tab
+        
+        Args:
+            page_id (int): The ID of the tab to switch to
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mSwitching to tab: {page_id}\033[0m")
+        return await self._execute_browser_action("switch_tab", {"page_id": page_id})
+
+    # @openapi_schema({
+    #     "type": "function",
+    #     "function": {
+    #         "name": "browser_open_tab",
+    #         "description": "Open a new browser tab with the specified URL",
+    #         "parameters": {
+    #             "type": "object",
+    #             "properties": {
+    #                 "url": {
+    #                     "type": "string",
+    #                     "description": "The URL to open in the new tab"
+    #                 }
+    #             },
+    #             "required": ["url"]
+    #         }
+    #     }
+    # })
+    # @xml_schema(
+    #     tag_name="browser-open-tab",
+    #     mappings=[
+    #         {"param_name": "url", "node_type": "content", "path": "."}
+    #     ],
+    #     example='''
+    #     <browser-open-tab>
+    #     https://example.com
+    #     </browser-open-tab>
+    #     '''
+    # )
+    # async def browser_open_tab(self, url: str) -> ToolResult:
+    #     """Open a new browser tab with the specified URL
+        
+    #     Args:
+    #         url (str): The URL to open in the new tab
+            
+    #     Returns:
+    #         dict: Result of the execution
+    #     """
+    #     print(f"\033[95mOpening new tab with URL: {url}\033[0m")
+    #     return await self._execute_browser_action("open_tab", {"url": url})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_close_tab",
+            "description": "Close a browser tab",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "page_id": {
+                        "type": "integer",
+                        "description": "The ID of the tab to close"
+                    }
+                },
+                "required": ["page_id"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-close-tab",
+        mappings=[
+            {"param_name": "page_id", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-close-tab>
+        1
+        </browser-close-tab>
+        '''
+    )
+    async def browser_close_tab(self, page_id: int) -> ToolResult:
+        """Close a browser tab
+        
+        Args:
+            page_id (int): The ID of the tab to close
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mClosing tab: {page_id}\033[0m")
+        return await self._execute_browser_action("close_tab", {"page_id": page_id})
+
+    # @openapi_schema({
+    #     "type": "function",
+    #     "function": {
+    #         "name": "browser_extract_content",
+    #         "description": "Extract content from the current page based on the provided goal",
+    #         "parameters": {
+    #             "type": "object",
+    #             "properties": {
+    #                 "goal": {
+    #                     "type": "string",
+    #                     "description": "The extraction goal (e.g., 'extract all links', 'find product information')"
+    #                 }
+    #             },
+    #             "required": ["goal"]
+    #         }
+    #     }
+    # })
+    # @xml_schema(
+    #     tag_name="browser-extract-content",
+    #     mappings=[
+    #         {"param_name": "goal", "node_type": "content", "path": "."}
+    #     ],
+    #     example='''
+    #     <browser-extract-content>
+    #     Extract all links on the page
+    #     </browser-extract-content>
+    #     '''
+    # )
+    # async def browser_extract_content(self, goal: str) -> ToolResult:
+    #     """Extract content from the current page based on the provided goal
+        
+    #     Args:
+    #         goal (str): The extraction goal
+            
+    #     Returns:
+    #         dict: Result of the execution
+    #     """
+    #     print(f"\033[95mExtracting content with goal: {goal}\033[0m")
+    #     result = await self._execute_browser_action("extract_content", {"goal": goal})
+        
+    #     # Format content for better readability
+    #     if result.get("success"):
+    #         print(f"\033[92mContent extraction successful\033[0m")
+    #         content = result.data.get("content", "")
+    #         url = result.data.get("url", "")
+    #         title = result.data.get("title", "")
+            
+    #         if content:
+    #             content_preview = content[:200] + "..." if len(content) > 200 else content
+    #             print(f"\033[95mExtracted content from {title} ({url}):\033[0m")
+    #             print(f"\033[96m{content_preview}\033[0m")
+    #             print(f"\033[95mTotal content length: {len(content)} characters\033[0m")
+    #         else:
+    #             print(f"\033[93mNo content extracted from {url}\033[0m")
+    #     else:
+    #         print(f"\033[91mFailed to extract content: {result.data.get('error', 'Unknown error')}\033[0m")
+        
+    #     return result
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_scroll_down",
+            "description": "Scroll down the page",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "amount": {
+                        "type": "integer",
+                        "description": "Pixel amount to scroll (if not specified, scrolls one page)"
+                    }
+                }
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-scroll-down",
+        mappings=[
+            {"param_name": "amount", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-scroll-down>
+        500
+        </browser-scroll-down>
+        '''
+    )
+    async def browser_scroll_down(self, amount: int = None) -> ToolResult:
+        """Scroll down the page
+        
+        Args:
+            amount (int, optional): Pixel amount to scroll. If None, scrolls one page.
+            
+        Returns:
+            dict: Result of the execution
+        """
+        params = {}
+        if amount is not None:
+            params["amount"] = amount
+            print(f"\033[95mScrolling down by {amount} pixels\033[0m")
+        else:
+            print(f"\033[95mScrolling down one page\033[0m")
+        
+        return await self._execute_browser_action("scroll_down", params)
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_scroll_up",
+            "description": "Scroll up the page",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "amount": {
+                        "type": "integer",
+                        "description": "Pixel amount to scroll (if not specified, scrolls one page)"
+                    }
+                }
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-scroll-up",
+        mappings=[
+            {"param_name": "amount", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-scroll-up>
+        500
+        </browser-scroll-up>
+        '''
+    )
+    async def browser_scroll_up(self, amount: int = None) -> ToolResult:
+        """Scroll up the page
+        
+        Args:
+            amount (int, optional): Pixel amount to scroll. If None, scrolls one page.
+            
+        Returns:
+            dict: Result of the execution
+        """
+        params = {}
+        if amount is not None:
+            params["amount"] = amount
+            print(f"\033[95mScrolling up by {amount} pixels\033[0m")
+        else:
+            print(f"\033[95mScrolling up one page\033[0m")
+        
+        return await self._execute_browser_action("scroll_up", params)
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_scroll_to_text",
+            "description": "Scroll to specific text on the page",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "text": {
+                        "type": "string",
+                        "description": "The text to scroll to"
+                    }
+                },
+                "required": ["text"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-scroll-to-text",
+        mappings=[
+            {"param_name": "text", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-scroll-to-text>
+        Contact Us
+        </browser-scroll-to-text>
+        '''
+    )
+    async def browser_scroll_to_text(self, text: str) -> ToolResult:
+        """Scroll to specific text on the page
+        
+        Args:
+            text (str): The text to scroll to
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mScrolling to text: {text}\033[0m")
+        return await self._execute_browser_action("scroll_to_text", {"text": text})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_get_dropdown_options",
+            "description": "Get all options from a dropdown element",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the dropdown element"
+                    }
+                },
+                "required": ["index"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-get-dropdown-options",
+        mappings=[
+            {"param_name": "index", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-get-dropdown-options>
+        2
+        </browser-get-dropdown-options>
+        '''
+    )
+    async def browser_get_dropdown_options(self, index: int) -> ToolResult:
+        """Get all options from a dropdown element
+        
+        Args:
+            index (int): The index of the dropdown element
+            
+        Returns:
+            dict: Result of the execution with the dropdown options
+        """
+        print(f"\033[95mGetting options from dropdown with index: {index}\033[0m")
+        return await self._execute_browser_action("get_dropdown_options", {"index": index})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_select_dropdown_option",
+            "description": "Select an option from a dropdown by text",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the dropdown element"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "The text of the option to select"
+                    }
+                },
+                "required": ["index", "text"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-select-dropdown-option",
+        mappings=[
+            {"param_name": "index", "node_type": "attribute", "path": "."},
+            {"param_name": "text", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <browser-select-dropdown-option index="2">
+        Option 1
+        </browser-select-dropdown-option>
+        '''
+    )
+    async def browser_select_dropdown_option(self, index: int, text: str) -> ToolResult:
+        """Select an option from a dropdown by text
+        
+        Args:
+            index (int): The index of the dropdown element
+            text (str): The text of the option to select
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mSelecting option '{text}' from dropdown with index: {index}\033[0m")
+        return await self._execute_browser_action("select_dropdown_option", {"index": index, "text": text})
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "browser_drag_drop",
+            "description": "Perform drag and drop operation between elements or coordinates",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "element_source": {
+                        "type": "string",
+                        "description": "The source element selector"
+                    },
+                    "element_target": {
+                        "type": "string",
+                        "description": "The target element selector"
+                    },
+                    "coord_source_x": {
+                        "type": "integer",
+                        "description": "The source X coordinate"
+                    },
+                    "coord_source_y": {
+                        "type": "integer",
+                        "description": "The source Y coordinate"
+                    },
+                    "coord_target_x": {
+                        "type": "integer",
+                        "description": "The target X coordinate"
+                    },
+                    "coord_target_y": {
+                        "type": "integer",
+                        "description": "The target Y coordinate"
+                    }
+                }
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="browser-drag-drop",
+        mappings=[
+            {"param_name": "element_source", "node_type": "attribute", "path": "."},
+            {"param_name": "element_target", "node_type": "attribute", "path": "."},
+            {"param_name": "coord_source_x", "node_type": "attribute", "path": "."},
+            {"param_name": "coord_source_y", "node_type": "attribute", "path": "."},
+            {"param_name": "coord_target_x", "node_type": "attribute", "path": "."},
+            {"param_name": "coord_target_y", "node_type": "attribute", "path": "."}
+        ],
+        example='''
+        <browser-drag-drop element_source="#draggable" element_target="#droppable"></browser-drag-drop>
+        '''
+    )
+    async def browser_drag_drop(self, element_source: str = None, element_target: str = None, 
+                               coord_source_x: int = None, coord_source_y: int = None,
+                               coord_target_x: int = None, coord_target_y: int = None) -> ToolResult:
+        """Perform drag and drop operation between elements or coordinates
+        
+        Args:
+            element_source (str, optional): The source element selector
+            element_target (str, optional): The target element selector
+            coord_source_x (int, optional): The source X coordinate
+            coord_source_y (int, optional): The source Y coordinate
+            coord_target_x (int, optional): The target X coordinate
+            coord_target_y (int, optional): The target Y coordinate
+            
+        Returns:
+            dict: Result of the execution
+        """
+        params = {}
+        
+        if element_source and element_target:
+            params["element_source"] = element_source
+            params["element_target"] = element_target
+            print(f"\033[95mDragging from element '{element_source}' to '{element_target}'\033[0m")
+        elif all(coord is not None for coord in [coord_source_x, coord_source_y, coord_target_x, coord_target_y]):
+            params["coord_source_x"] = coord_source_x
+            params["coord_source_y"] = coord_source_y
+            params["coord_target_x"] = coord_target_x
+            params["coord_target_y"] = coord_target_y
+            print(f"\033[95mDragging from coordinates ({coord_source_x}, {coord_source_y}) to ({coord_target_x}, {coord_target_y})\033[0m")
+        else:
+            return self.fail_response("Must provide either element selectors or coordinates for drag and drop")
+        
+        return await self._execute_browser_action("drag_drop", params)
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@ -94,7 +94,9 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

 # Copy server script
+COPY . /app
 COPY server.py /app/server.py
+COPY browser_api.py /app/browser_api.py

 # Install Playwright and browsers with system dependencies
 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
@ -106,9 +108,6 @@ RUN playwright install chromium
 # Verify installation
 RUN python -c "from playwright.sync_api import sync_playwright; print('Playwright installation verified')"

-# Copy the application code
-# COPY . .
-
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
--- a/backend/sandbox/docker/api.py
+++ b/backend/sandbox/docker/api.py
@ -1,18 +0,0 @@
-from fastapi import FastAPI
-from automation_service import automation_service
-
-# Create API app
-api_app = FastAPI()
-
-@api_app.get("/api")
-async def health_check():
-    return {"status": "ok", "message": "API server is running"}
-
-# Include automation service router with /api prefix
-api_app.include_router(automation_service.router, prefix="/api")
-
-# This is needed for the import string approach with uvicorn
-if __name__ == '__main__':
-    import uvicorn
-    print("Starting API server")
-    uvicorn.run("api:api_app", host="0.0.0.0", port=8000) 
--- a/backend/sandbox/docker/automation_service.py
+++ b/backend/sandbox/docker/automation_service.py
@ -1,195 +0,0 @@
-import pyautogui
-import time
-import os
-import sys
-from typing import List, Dict, Any, Optional, Union
-import io
-import base64
-from PIL import Image
-from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel
-from enum import Enum
-
-# Set environment variable for the display if not already set
-if 'DISPLAY' not in os.environ:
-    os.environ['DISPLAY'] = ':99'
-
-# Try to initialize pyautogui with error handling
-try:
-    pyautogui.FAILSAFE = False
-except Exception as e:
-    print(f"Warning: Could not initialize pyautogui: {e}", file=sys.stderr)
-    print("This may be due to X11 authentication issues. Continuing anyway.", file=sys.stderr)
-
-## Input Models
-
-class MouseButton(str, Enum):
-    left = "left"
-    middle = "middle"
-    right = "right"
-
-class Position(BaseModel):
-    x: Optional[int] = None
-    y: Optional[int] = None
-
-class MouseAction(BaseModel):
-    x: Optional[int] = None
-    y: Optional[int] = None
-    clicks: Optional[int] = 1
-    interval: Optional[float] = 0.0
-    button: MouseButton = MouseButton.left
-    duration: Optional[float] = 0.0
-    
-class KeyboardAction(BaseModel):
-    key: str
-
-class KeyboardPress(BaseModel):
-    keys: Union[str, List[str]]
-    presses: Optional[int] = 1
-    interval: Optional[float] = 0.0
-    
-class WriteAction(BaseModel):
-    message: str
-    interval: Optional[float] = 0.0
-
-class HotkeyAction(BaseModel):
-    keys: List[str]
-    interval: Optional[float] = 0.0 
-    
-    
-class AutomationService:
-    def __init__(self):
-        self.router = APIRouter()
-        
-        # Set fallback to avoid crashes
-        pyautogui.FAILSAFE = False
-        
-        # X error handling
-        try:
-            # Test if we can get the screen size
-            self.screen_width, self.screen_height = pyautogui.size()
-            print(f"Screen size detected: {self.screen_width}x{self.screen_height}")
-            self.x11_available = True
-        except Exception as e:
-            print(f"Warning: Could not get screen size: {e}", file=sys.stderr)
-            print("X11 functionality may be limited. Using fallback values.", file=sys.stderr)
-            self.screen_width = 1920
-            self.screen_height = 1080
-            self.x11_available = False
-
-        self.router.get("/automation/mouse/position")(self.get_mouse_position)
-        self.router.post("/automation/mouse/move")(self.move_mouse)
-        self.router.post("/automation/mouse/click")(self.click_mouse)
-        self.router.post("/automation/mouse/down")(self.mouse_down)
-        self.router.post("/automation/mouse/up")(self.mouse_up)
-        self.router.post("/automation/mouse/drag")(self.drag_mouse)
-        self.router.post("/automation/mouse/scroll")(self.scroll_mouse)
-        self.router.post("/automation/keyboard/down")(self.key_down)
-        self.router.post("/automation/keyboard/up")(self.key_up)
-        self.router.post("/automation/keyboard/press")(self.press_key)
-        self.router.post("/automation/keyboard/write")(self.write_text)
-        self.router.post("/automation/keyboard/hotkey")(self.press_hotkey)
-        self.router.post("/automation/screenshot")(self.take_screenshot)
-
-    async def get_mouse_position(self):
-        try:
-            x, y = pyautogui.position()
-            return {"x": x, "y": y}
-        except Exception as e:
-            return {"error": str(e), "x": 0, "y": 0}
-
-    async def move_mouse(self, action: Position):
-        try:
-            pyautogui.moveTo(x=action.x, y=action.y)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def click_mouse(self, action: MouseAction):
-        try:
-            pyautogui.click(x=action.x, y=action.y, clicks=action.clicks,
-                          interval=action.interval, button=action.button,
-                          duration=action.duration)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def mouse_down(self, action: MouseAction):
-        try:
-            pyautogui.mouseDown(x=action.x, y=action.y,
-                              button=action.button, duration=action.duration)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def mouse_up(self, action: MouseAction):
-        try:
-            pyautogui.mouseUp(x=action.x, y=action.y,
-                            button=action.button, duration=action.duration)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def drag_mouse(self, action: MouseAction):
-        try:
-            pyautogui.dragTo(x=action.x, y=action.y,
-                           duration=action.duration, button=action.button)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def scroll_mouse(self, action: MouseAction):
-        try:
-            pyautogui.scroll(clicks=action.clicks, x=action.x, y=action.y)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def key_down(self, action: KeyboardAction):
-        try:
-            pyautogui.keyDown(action.key)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def key_up(self, action: KeyboardAction):
-        try:
-            pyautogui.keyUp(action.key)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def press_key(self, action: KeyboardPress):
-        try:
-            pyautogui.press(keys=action.keys, presses=action.presses,
-                          interval=action.interval)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def write_text(self, action: WriteAction):
-        try:
-            pyautogui.write(message=action.message, interval=action.interval)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def press_hotkey(self, action: HotkeyAction):
-        try:
-            pyautogui.hotkey(*action.keys, interval=action.interval)
-            return {"success": True}
-        except Exception as e:
-            return {"success": False, "error": str(e)}
-
-    async def take_screenshot(self) -> Dict[str, str]:
-        try:
-            screenshot = pyautogui.screenshot()
-            img_byte_arr = io.BytesIO()
-            screenshot.save(img_byte_arr, format='PNG')
-            img_byte_arr = img_byte_arr.getvalue()
-            return {"image": base64.b64encode(img_byte_arr).decode()}
-        except Exception as e:
-            return {"error": str(e)}
-
-# Create a singleton instance
-automation_service = AutomationService()     
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@ -6,7 +6,7 @@ services:
      dockerfile: ${DOCKERFILE:-Dockerfile}
      args:
        TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortixmarko/kortix-suna:0.0.5
+    image: adamcohenhillel/kortix-suna:0.0.13
    ports:
      - "6080:6080"  # noVNC web interface
      - "5901:5901"  # VNC port
--- a/backend/sandbox/docker/supervisord.conf
+++ b/backend/sandbox/docker/supervisord.conf
@ -65,21 +65,6 @@ startretries=5
 startsecs=3
 depends_on=x11vnc

-[program:persistent_browser]
-environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
-command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
-autorestart=true
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-stderr_logfile=/dev/stderr
-stderr_logfile_maxbytes=0
-priority=350
-startretries=5
-startsecs=10
-stopsignal=TERM
-stopwaitsecs=15
-depends_on=novnc
-
 [program:http_server]
 command=python /app/server.py
 directory=/app
@ -94,8 +79,8 @@ startsecs=5
 stopsignal=TERM
 stopwaitsecs=10

-[program:api_server]
-command=python /app/api.py
+[program:browser_api]
+command=python /app/browser_api.py
 directory=/app
 autorestart=true
 stdout_logfile=/dev/stdout
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@ -121,11 +121,12 @@ def prepare_params(
        logger.debug(f"Added {len(tools)} tools to API parameters")

    # # Add Claude-specific headers
-    # if "claude" in model_name.lower() or "anthropic" in model_name.lower():
-    #     params["extra_headers"] = {
-    #         "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
-    #     }
-    #     logger.debug("Added Claude-specific headers")
+    if "claude" in model_name.lower() or "anthropic" in model_name.lower():
+        params["extra_headers"] = {
+            # "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
+            "anthropic-beta": "output-128k-2025-02-19"
+        }
+        logger.debug("Added Claude-specific headers")
    
    # Add OpenRouter-specific parameters
    if model_name.startswith("openrouter/"):
--- a/backend/utils/billing.py
+++ b/backend/utils/billing.py
@ -4,9 +4,9 @@ from services.supabase import DBConnection

 # Define subscription tiers and their monthly hour limits
 SUBSCRIPTION_TIERS = {
-    'price_1RDQbOG6l1KZGqIrgrYzMbnL': {'name': 'free', 'hours': 1},
-    'price_1RC2PYG6l1KZGqIrpbzFB9Lp': {'name': 'base', 'hours': 1},
-    'price_1RDQWqG6l1KZGqIrChli4Ys4': {'name': 'extra', 'hours': 1}
+    'price_1RDQbOG6l1KZGqIrgrYzMbnL': {'name': 'free', 'hours': 100},
+    'price_1RC2PYG6l1KZGqIrpbzFB9Lp': {'name': 'base', 'hours': 100},
+    'price_1RDQWqG6l1KZGqIrChli4Ys4': {'name': 'extra', 'hours': 100}
 }

 async def get_account_subscription(client, account_id: str) -> Optional[Dict]:
--- a/frontend/src/app/dashboard/agents/[threadId]/page.tsx
+++ b/frontend/src/app/dashboard/agents/[threadId]/page.tsx
@ -282,6 +282,12 @@ export default function AgentPage({ params }: AgentPageProps) {
          part.isToolCall = !isUserMessage;
          part.status = part.isClosing ? 'completed' : 'running';
          
+          // Check if this is a browser-related tool and add VNC preview
+          if (part.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
+            console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${part.tagName}`);
+            part.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
+          }
+          
          // Use ID for deduplication
          if (!seenTagIds.has(part.id)) {
            seenTagIds.add(part.id);
@ -307,6 +313,12 @@ export default function AgentPage({ params }: AgentPageProps) {
        tag.isToolCall = !isUserMessage;
        tag.status = tag.isClosing ? 'completed' : 'running';
        
+        // Check if this is a browser-related tool and add VNC preview
+        if (tag.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
+          console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${tag.tagName}`);
+          tag.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
+        }
+        
        // Use ID for deduplication
        if (!seenTagIds.has(tag.id)) {
          seenTagIds.add(tag.id);
@ -381,7 +393,7 @@ export default function AgentPage({ params }: AgentPageProps) {
    
    // Update tool calls in the shared context
    setToolCalls(pairedTags);
-  }, [messages, streamContent, setToolCalls]);
+  }, [messages, streamContent, setToolCalls, agent]);
  
  // Scroll to bottom of messages
  const scrollToBottom = useCallback(() => {
@ -752,6 +764,10 @@ export default function AgentPage({ params }: AgentPageProps) {
              <>
                {messages.map((message, index) => {
                  // Skip messages containing "ToolResult("
+                  if (!message || !message?.content || !message?.role) {
+                    return null;
+                  }
+
                  if (message.content.includes("ToolResult(")) {
                    return null;
                  }
@ -927,6 +943,9 @@ export default function AgentPage({ params }: AgentPageProps) {
          <>
            {messages.map((message, index) => {
              // Skip messages containing "ToolResult("
+              if (!message || !message?.content || !message?.role) {
+                return null;
+              }
              if (message.content.includes("ToolResult(")) {
                return null;
              }
--- a/frontend/src/components/billing/PlanComparison.tsx
+++ b/frontend/src/components/billing/PlanComparison.tsx
@ -16,12 +16,12 @@ export const SUBSCRIPTION_PLANS = {
 const PLAN_DETAILS = {
  [SUBSCRIPTION_PLANS.FREE]: {
    name: 'Free',
-    limit: 1,
+    limit: 100,
    price: 0
  },
  [SUBSCRIPTION_PLANS.BASIC]: {
    name: 'Basic',
-    limit: 10,
+    limit: 100,
    price: 10
  },
  [SUBSCRIPTION_PLANS.PRO]: {
--- a/frontend/src/components/chat/tool-components.tsx
+++ b/frontend/src/components/chat/tool-components.tsx
@ -4,7 +4,7 @@ import React from 'react';
 import { ParsedTag, ToolComponentProps } from '@/lib/types/tool-calls';
 import { 
  File, FileText, Terminal, FolderPlus, Folder, Code, Search as SearchIcon, 
-  Bell, Replace, Plus, Minus
+  Bell, Replace, Plus, Minus, Globe, Search
 } from 'lucide-react';
 import { cn } from '@/lib/utils';
 import { diffLines } from 'diff';
@ -458,6 +458,128 @@ export const SearchCodeTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
  );
 };

+/**
+ * Browser Navigate Tool Component
+ */
+export const BrowserNavigateTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
+  const url = tag.content || '';
+  const isRunning = tag.status === 'running';
+  
+  if (mode === 'compact') {
+    return (
+      <CompactToolDisplay
+        icon={<Globe className="h-4 w-4 mr-2" />}
+        name={isRunning ? "Navigating to" : "Navigated to"}
+        input={url}
+        isRunning={isRunning}
+      />
+    );
+  }
+
+  return (
+    <div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
+      <div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
+        <Globe className="h-4 w-4 mr-2" />
+        <div className="flex-1">{isRunning ? `Navigating to` : `Navigated to`}: {url}</div>
+        {isRunning && (
+          <div className="flex items-center gap-2">
+            <span className="text-amber-500">Running</span>
+            <div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
+          </div>
+        )}
+      </div>
+      <div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
+        <div className="space-y-2">
+          <div className="flex items-center gap-1 text-xs text-muted-foreground mb-1">
+            <Globe className="h-3 w-3" />
+            <span className="font-mono">{url}</span>
+          </div>
+          
+          {/* Display VNC preview if available */}
+          {tag.vncPreview && (
+            <div className="mt-2 border border-subtle dark:border-white/10 rounded-md overflow-hidden">
+              <div className="text-xs bg-black text-white p-1">VNC Preview</div>
+              <div className="relative w-full h-[300px] overflow-hidden">
+                <iframe 
+                  src={tag.vncPreview} 
+                  title="Browser preview" 
+                  className="absolute top-0 left-0 border-0"
+                  style={{
+                    width: '200%',
+                    height: '200%',
+                    transform: 'scale(0.5)',
+                    transformOrigin: '0 0'
+                  }}
+                  sandbox="allow-same-origin allow-scripts"
+                />
+              </div>
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+};
+
+/**
+ * Web Search Tool Component
+ */
+export const WebSearchTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
+  const query = tag.attributes.query || '';
+  const isRunning = tag.status === 'running';
+  
+  if (mode === 'compact') {
+    return (
+      <CompactToolDisplay
+        icon={<Search className="h-4 w-4 mr-2" />}
+        name={isRunning ? "Web search in progress..." : "Web search complete"}
+        input={query}
+        isRunning={isRunning}
+      />
+    );
+  }
+
+  const results = tag.result?.output ? JSON.parse(tag.result.output) : [];
+
+  return (
+    <div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
+      <div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
+        <Search className="h-4 w-4 mr-2" />
+        <div className="flex-1">Web Search: {query}</div>
+        {isRunning && (
+          <div className="flex items-center gap-2">
+            <span className="text-amber-500">Searching</span>
+            <div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
+          </div>
+        )}
+      </div>
+      <div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
+        {results.length > 0 ? (
+          <div className="space-y-3">
+            {results.map((result: any, index: number) => (
+              <div key={index} className="text-sm">
+                <a href={result.URL} target="_blank" rel="noopener noreferrer" className="font-medium text-blue-600 hover:underline">
+                  {result.Title}
+                </a>
+                <div className="text-xs text-muted-foreground mt-1">
+                  {result.URL}
+                  {result['Published Date'] && (
+                    <span className="ml-2">
+                      ({new Date(result['Published Date']).toLocaleDateString()})
+                    </span>
+                  )}
+                </div>
+              </div>
+            ))}
+          </div>
+        ) : (
+          <div className="text-sm text-muted-foreground">No results found</div>
+        )}
+      </div>
+    </div>
+  );
+};
+
 // Tool component registry
 export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>> = {
  'create-file': CreateFileTool,
@ -471,10 +593,28 @@ export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>>
  'ask': NotifyTool,  // Handle ask similar to notify for now
  'complete': NotifyTool, // Handle complete similar to notify for now
  'full-file-rewrite': FullFileRewriteTool,
+  'browser-navigate-to': BrowserNavigateTool,
+  'browser-click-element': BrowserNavigateTool,
+  'browser-input-text': BrowserNavigateTool,
+  'browser-go-back': BrowserNavigateTool,
+  'browser-wait': BrowserNavigateTool,
+  'browser-scroll-down': BrowserNavigateTool,
+  'browser-scroll-up': BrowserNavigateTool,
+  'browser-scroll-to-text': BrowserNavigateTool,
+  'browser-switch-tab': BrowserNavigateTool,
+  'browser-close-tab': BrowserNavigateTool,
+  'browser-get-dropdown-options': BrowserNavigateTool,
+  'browser-select-dropdown-option': BrowserNavigateTool,
+  'browser-drag-drop': BrowserNavigateTool,
+  'web-search': WebSearchTool,
 };

 // Helper function to get the appropriate component for a tag
 export function getComponentForTag(tag: ParsedTag): React.FC<ToolComponentProps> {
+  console.log("getComponentForTag", tag);
+  if (!tag || !tag?.tagName) {
+    console.warn(`No tag name for tag: ${tag}`);
+  }
  if (!ToolComponentRegistry[tag.tagName]) {
    console.warn(`No component registered for tag type: ${tag.tagName}`);
  }
--- a/frontend/src/hooks/use-tools-panel.tsx
+++ b/frontend/src/hooks/use-tools-panel.tsx
@ -175,29 +175,3 @@ export function useToolsPanel() {
    prevTool,
  };
 }
-
-// Helper function to get a friendly title for a tool call
-function getToolTitle(tag: ParsedTag): string {
-  switch (tag.tagName) {
-    case 'create-file':
-      return `Creating file: ${tag.attributes.file_path || ''}`;
-    case 'read-file':
-      return `Reading file: ${tag.attributes.file_path || ''}`;
-    case 'execute-command':
-      return `Executing: ${tag.attributes.command || ''}`;
-    case 'create-directory':
-      return `Creating directory: ${tag.attributes.path || ''}`;
-    case 'list-directory':
-      return `Listing directory: ${tag.attributes.path || ''}`;
-    case 'search-code':
-      return `Searching code: ${tag.attributes.query || ''}`;
-    case 'notify':
-      return `Notification: ${tag.attributes.message || ''}`;
-    case 'str-replace':
-      return `String replace: ${tag.attributes.pattern || ''}`;
-    case 'full-file-rewrite':
-      return `Full file rewrite: ${tag.attributes.file_path || ''}`;
-    default:
-      return `${tag.tagName} operation`;
-  }
-} 
--- a/frontend/src/lib/api.ts
+++ b/frontend/src/lib/api.ts
@ -80,8 +80,11 @@ export type Project = {
  description: string;
  account_id: string;
  created_at: string;
-  sandbox_id?: string;
-  sandbox_pass?: string;
+  sandbox: {
+    vnc_preview?: string;
+    id?: string;
+    pass?: string;
+  };
 }

 export type Thread = {
@ -214,7 +217,8 @@ export const createProject = async (
    name: data.name,
    description: data.description || '',
    account_id: data.account_id,
-    created_at: data.created_at
+    created_at: data.created_at,
+    sandbox: { id: "", pass: "", vnc_preview: "" }
  };
 };

--- a/frontend/src/lib/types/tool-calls.ts
+++ b/frontend/src/lib/types/tool-calls.ts
@ -13,6 +13,9 @@ export interface ParsedTag {
  isToolCall?: boolean; // Whether this is a tool call (vs a result)
  isPaired?: boolean; // Whether this tag has been paired with its call/result
  status?: 'running' | 'completed' | 'error'; // Status of the tool call
+  
+  // VNC preview for browser-related tools
+  vncPreview?: string; // VNC preview image URL
 }

 // Display mode for tool components
@ -37,7 +40,21 @@ export const SUPPORTED_XML_TAGS = [
  'list-directory',
  'search-code',
  'complete',
-  'full-file-rewrite'
+  'full-file-rewrite',
+  'browser-navigate-to',
+  'browser-click-element',
+  'browser-input-text',
+  'browser-go-back',
+  'browser-wait',
+  'browser-scroll-down',
+  'browser-scroll-up',
+  'browser-scroll-to-text',
+  'browser-switch-tab',
+  'browser-close-tab',
+  'browser-get-dropdown-options',
+  'browser-select-dropdown-option',
+  'browser-drag-drop',
+  'web-search'
 ];

 // Tool status labels