From 55b4d26348fedbc2ca9aa5bddc4380a4dff81a42 Mon Sep 17 00:00:00 2001 From: Adam Cohen Hillel Date: Tue, 8 Apr 2025 21:09:45 +0100 Subject: [PATCH] sb tempo --- backend/.env.example | 8 +- backend/agent/run.py | 18 +- backend/agent/tools/sb_browse_tool.py | 91 +++++++ backend/agent/tools/sb_shell_tool.py | 56 +++++ backend/agent/tools/sb_website_tool.py | 56 +++++ backend/agent/tools/utils/daytona_sandbox.py | 235 +++++++++++++++++++ backend/docker/Dockerfile | 2 +- 7 files changed, 459 insertions(+), 7 deletions(-) create mode 100644 backend/agent/tools/sb_browse_tool.py create mode 100644 backend/agent/tools/sb_shell_tool.py create mode 100644 backend/agent/tools/sb_website_tool.py create mode 100644 backend/agent/tools/utils/daytona_sandbox.py diff --git a/backend/.env.example b/backend/.env.example index fbf80f90..e894d093 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -11,7 +11,13 @@ REDIS_PORT= REDIS_PASSWORD= REDIS_SSL= - +# AWS Bedrock: AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_REGION_NAME= + +# Sandbox container provider: + +DAYTONA_API_KEY= +DAYTONA_SERVER_URL= +DAYTONA_TARGET= \ No newline at end of file diff --git a/backend/agent/run.py b/backend/agent/run.py index cd00ce13..3996480a 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -3,12 +3,14 @@ import json import uuid from agentpress.thread_manager import ThreadManager from agent.tools.files_tool import FilesTool +from agent.tools.sb_browse_tool import SandboxTool from agent.tools.terminal_tool import TerminalTool # from agent.tools.search_tool import CodeSearchTool from typing import Optional from agent.prompt import get_system_prompt from agentpress.response_processor import ProcessorConfig from dotenv import load_dotenv +from agent.tools.utils.daytona_sandbox import create_sandbox # Load environment variables load_dotenv() @@ -19,20 +21,26 @@ async def run_agent(thread_id: str, stream: bool = True, thread_manager: Optiona if not thread_manager: thread_manager = ThreadManager() + if True: # todo: change to of not sandbox running + sandbox = create_sandbox(TEMP_PASSWORD) + sandbox_id = sandbox.id + sandbox_password = "vvv" + print("Adding tools to thread manager...") - thread_manager.add_tool(FilesTool) - thread_manager.add_tool(TerminalTool) + # thread_manager.add_tool(FilesTool) + # thread_manager.add_tool(TerminalTool) # thread_manager.add_tool(CodeSearchTool) - + thread_manager.add_tool(SandboxTool, sandbox_id=sandbox_id, password=sandbox_password) + system_message = { "role": "system", "content": get_system_prompt() } - model_name = "anthropic/claude-3-5-sonnet-latest" + # model_name = "anthropic/claude-3-5-sonnet-latest" #anthropic/claude-3-7-sonnet-latest - #openai/gpt-4o + model_name = "openai/gpt-4o" #groq/deepseek-r1-distill-llama-70b #bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0 diff --git a/backend/agent/tools/sb_browse_tool.py b/backend/agent/tools/sb_browse_tool.py new file mode 100644 index 00000000..d3726e02 --- /dev/null +++ b/backend/agent/tools/sb_browse_tool.py @@ -0,0 +1,91 @@ +import traceback +import requests + +from agentpress.tool import ToolResult, openapi_schema, xml_schema +from agent.tools.utils.daytona_sandbox import SandboxToolsBase +from utils.logger import logger + + +# TODO: might want to be more granular with the tool names: +# browser_view - View content of the current browser page. Use for checking the latest state of previously opened pages. +# browser_navigate - Navigate browser to specified URL. Use when accessing new pages is needed. +# browser_restart - Restart browser and navigate to specified URL. Use when browser state needs to be reset. +# browser_click - Click on elements in the current browser page. Use when clicking page elements is needed. +# browser_input - Overwrite text in editable elements on the current browser page. Use when filling content in input fields. +# browser_move_mouse - Move cursor to specified position on the current browser page. Use when simulating user mouse movement. +# browser_press_key - Simulate key press in the current browser page. Use when specific keyboard operations are needed. +# browser_select_option - Select specified option from dropdown list element in the current browser page. Use when selecting dropdown menu options. +# browser_scroll_up - Scroll up the current browser page. Use when viewing content above or returning to page top. +# browser_scroll_down - Scroll down the current browser page. Use when viewing content below or jumping to page bottom. +# browser_console_exec - Execute JavaScript code in browser console. Use when custom scripts need to be executed. +# browser_console_view - View browser console output. Use when checking JavaScript logs or debugging page errors. + + +class SandboxBrowseTool(SandboxToolsBase): + """Tool for executing tasks in a Daytona sandbox with browser-use capabilities.""" + + def __init__(self, sandbox_id: str, password: str): + super().__init__(sandbox_id, password) + + @openapi_schema({ + "type": "function", + "function": { + "name": "execute_browser_action", + "description": "Execute a simple browser action in the sandbox environment based on current state", + "parameters": { + "type": "object", + "properties": { + "task_description": { + "type": "string", + "description": "A simple action to do on the browser based on current state" + } + }, + "required": ["task_description"] + } + } + }) + @xml_schema( + tag_name="execute-browser-action", + mappings=[ + {"param_name": "task_description", "node_type": "content", "path": "."} + ], + example=''' + + a simple action to do on the browser based on current state + + ''' + ) + async def execute_browser_action(self, task_description: str) -> ToolResult: + """Execute a browser task in the sandbox environment using browser-use + + Args: + task_description (str): The task to execute + + Returns: + dict: Result of the execution + """ + print(f"\033[95mExecuting browser action: {task_description}\033[0m") + try: + + + logger.info(f"Making API call to {self.api_url}/run-task with task: {task_description}") + + # Make the API call to our FastAPI endpoint + response = requests.post( + f"{self.api_url}/run-task", + json={"task_description": task_description}, + timeout=None + ) + + if response.status_code == 200: + logger.info("API call completed successfully") + print(response.json()) + return self.success_response(response.json()) + else: + logger.error(f"API call failed with status code {response.status_code}: {response.text}") + return self.fail_response(f"API call failed with status code {response.status_code}: {response.text}") + + except Exception as e: + logger.error(f"Error executing browser action: {e}") + print(traceback.format_exc()) + return self.fail_response(f"Error executing browser action: {e}") diff --git a/backend/agent/tools/sb_shell_tool.py b/backend/agent/tools/sb_shell_tool.py new file mode 100644 index 00000000..eff45684 --- /dev/null +++ b/backend/agent/tools/sb_shell_tool.py @@ -0,0 +1,56 @@ +from agentpress.tool import ToolResult, openapi_schema, xml_schema +from agent.tools.utils.daytona_sandbox import SandboxToolsBase + + +class SandboxShellTool(SandboxToolsBase): + """Tool for executing tasks in a Daytona sandbox with browser-use capabilities.""" + + def __init__(self, sandbox_id: str, password: str): + super().__init__(sandbox_id, password) + + + @openapi_schema({ + "type": "function", + "function": { + "name": "execute_command", + "description": "Execute a shell command in the workspace directory", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + } + } + }) + @xml_schema( + tag_name="execute-command", + mappings=[ + {"param_name": "command", "node_type": "content", "path": "."}, + ], + example=''' + + npm install package-name + + ''' + ) + async def execute_command(self, command: str, folder: str = None) -> ToolResult: + try: + folder = folder or self.sandbox.get_user_root_dir() + response = self.sandbox.process.exec(command, cwd=folder, timeout=60) + + if response.exit_code == 0: + return self.success_response({ + "output": response.result, + "error": "", + "exit_code": response.exit_code, + "cwd": folder + }) + else: + return self.fail_response(f"Command failed with exit code {response.exit_code}: {response.result}") + + except Exception as e: + return self.fail_response(f"Error executing command: {str(e)}") diff --git a/backend/agent/tools/sb_website_tool.py b/backend/agent/tools/sb_website_tool.py new file mode 100644 index 00000000..dd05bf5b --- /dev/null +++ b/backend/agent/tools/sb_website_tool.py @@ -0,0 +1,56 @@ +from agentpress.tool import ToolResult, openapi_schema, xml_schema +from agent.tools.utils.daytona_sandbox import SandboxToolsBase + + +class SandboxWebsiteTool(SandboxToolsBase): + """Tool for executing tasks in a Daytona sandbox with browser-use capabilities.""" + + def __init__(self, sandbox_id: str, password: str): + super().__init__(sandbox_id, password) + + + @openapi_schema({ + "type": "function", + "function": { + "name": "execute_command", + "description": "Execute a shell command in the workspace directory", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + } + } + }) + @xml_schema( + tag_name="execute-command", + mappings=[ + {"param_name": "command", "node_type": "content", "path": "."}, + ], + example=''' + + npm install package-name + + ''' + ) + async def execute_command(self, command: str, folder: str = None) -> ToolResult: + try: + folder = folder or self.sandbox.get_user_root_dir() + response = self.sandbox.process.exec(command, cwd=folder, timeout=60) + + if response.exit_code == 0: + return self.success_response({ + "output": response.result, + "error": "", + "exit_code": response.exit_code, + "cwd": folder + }) + else: + return self.fail_response(f"Command failed with exit code {response.exit_code}: {response.result}") + + except Exception as e: + return self.fail_response(f"Error executing command: {str(e)}") diff --git a/backend/agent/tools/utils/daytona_sandbox.py b/backend/agent/tools/utils/daytona_sandbox.py new file mode 100644 index 00000000..1ed36b46 --- /dev/null +++ b/backend/agent/tools/utils/daytona_sandbox.py @@ -0,0 +1,235 @@ +import os +import requests +from time import sleep + +from daytona_sdk import Daytona, DaytonaConfig, CreateSandboxParams, SessionExecuteRequest + +from agentpress.tool import Tool +from utils.logger import logger + +config = DaytonaConfig( + api_key=os.getenv("DAYTONA_API_KEY"), + server_url=os.getenv("DAYTONA_SERVER_URL"), + target=os.getenv("DAYTONA_TARGET") +) +daytona = Daytona(config) + + +sandbox_api = b''' +import traceback +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from browser_use import Agent, Browser +from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize +from langchain_openai import ChatOpenAI +import uvicorn +from contextlib import asynccontextmanager +import logging +import logging.handlers +import os + +# Configure logging +log_dir = "/var/log/kortix" +os.makedirs(log_dir, exist_ok=True) +log_file = os.path.join(log_dir, "kortix_api.log") + +logger = logging.getLogger("kortix_api") +logger.setLevel(logging.INFO) + +# Create rotating file handler +file_handler = logging.handlers.RotatingFileHandler( + log_file, + maxBytes=10485760, # 10MB + backupCount=5 +) +file_handler.setFormatter( + logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +) +logger.addHandler(file_handler) + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Initialize browser on startup + global browser + try: + browser = Browser() + logger.info("Browser initialized successfully") + except Exception as e: + logger.error(f"Error initializing browser: {str(e)}") + logger.error(traceback.format_exc()) + yield + # Clean up resources at shutdown if needed + +app = FastAPI(lifespan=lifespan) + +# Global variables to maintain browser state +browser = None +browser_context = None +agent = None + +class TaskRequest(BaseModel): + task_description: str + +@app.post("/run-task") +async def run_task(request: TaskRequest): + global browser, browser_context, agent + + if not browser: + try: + browser = Browser() + except Exception as e: + error_msg = f"Failed to initialize browser: {str(e)}" + logger.error(error_msg) + raise HTTPException(status_code=500, detail=error_msg) + + try: + # Create a browser context if it doesn't exist + if not browser_context: + browser_context = await browser.new_context( + config=BrowserContextConfig( + browser_window_size=BrowserContextWindowSize( + width=1280, height=800 + ), + ) + ) + logger.info("Created new browser context") + + # Create a new agent for each task + agent = Agent( + task=request.task_description, + llm=ChatOpenAI(model="gpt-4o"), + browser=browser, + browser_context=browser_context + ) + logger.info(f"Starting task: {request.task_description}") + + result = await agent.run() + + # Format the history for response + history = [] + for h in result.history: + logger.debug(f"Task history entry: {h}") + history.append(str(h)) + + logger.info("Task completed successfully") + return { + "status": "success", + "history": history + } + + except Exception as e: + error_traceback = traceback.format_exc() + logger.error(f"Error during task execution: {str(e)}") + logger.error(error_traceback) + raise HTTPException( + status_code=500, + detail={ + "status": "error", + "error": str(e), + "traceback": error_traceback + } + ) + +@app.get("/health") +async def health_check(): + status = { + "status": "healthy", + "browser_initialized": browser is not None, + "context_initialized": browser_context is not None + } + logger.debug(f"Health check: {status}") + return status + +if __name__ == "__main__": + logger.info("Starting Kortix API server") + uvicorn.run(app, host="0.0.0.0", port=8000) +''' + +def create_sandbox(password: str): + sandbox = daytona.create(CreateSandboxParams( + image="adamcohenhillel/kortix-browser-use:0.0.1", + env_vars={ + "CHROME_PERSISTENT_SESSION": "true", + "RESOLUTION": "1920x1080x24", + "RESOLUTION_WIDTH": "1920", + "RESOLUTION_HEIGHT": "1080", + "VNC_PASSWORD": password, + "OPENAI_ENDPOINT": "https://api.openai.com/v1", + "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"), + "ANTHROPIC_API_KEY": "", + "ANTHROPIC_ENDPOINT": "https://api.anthropic.com", + "GOOGLE_API_KEY": "", + "AZURE_OPENAI_ENDPOINT": "", + "AZURE_OPENAI_API_KEY": "", + "AZURE_OPENAI_API_VERSION": "2025-01-01-preview", + "DEEPSEEK_ENDPOINT": "https://api.deepseek.com", + "DEEPSEEK_API_KEY": "", + "OLLAMA_ENDPOINT": "http://localhost:11434", + "ANONYMIZED_TELEMETRY": "false", + "BROWSER_USE_LOGGING_LEVEL": "info", + "CHROME_PATH": "", + "CHROME_USER_DATA": "", + "CHROME_DEBUGGING_PORT": "9222", + "CHROME_DEBUGGING_HOST": "localhost", + "CHROME_CDP": "" + }, + ports=[ + 7788, # Gradio default port + 6080, # noVNC web interface + 5901, # VNC port + 9222, # Chrome remote debugging port + 8000, # FastAPI port + 8080 # HTTP website port + ] + )) + sandbox.fs.upload_file(sandbox.get_user_root_dir() + "/app.py", sandbox_api) + sandbox.process.create_session('kortix_browser_use_api') + rsp = sandbox.process.execute_session_command('kortix_browser_use_api', SessionExecuteRequest( + command="python " + sandbox.get_user_root_dir() + "/app.py", + var_async=True + )) + + times = 0 + success = False + api_url = sandbox.get_preview_link(8000) + while times < 10: + times += 1 + logger.info(f"Waiting for API to be ready...") + # Make the API call to our FastAPI endpoint + response = requests.get(f"{api_url}/health") + if response.status_code == 200: + logger.info(f"API call completed successfully") + success = True + break + else: + sleep(1) + + if not success: + raise Exception("API call failed") + + logger.info(f"Executed command {rsp}") + logger.info(f"Created kortix_browser_use_api session `kortix_browser_use_api`") + return sandbox + + +class SandboxToolsBase(Tool): + """Tool for executing tasks in a Daytona sandbox with browser-use capabilities.""" + + def __init__(self, sandbox_id: str, password: str): + super().__init__() + self.sandbox = None + self.daytona = daytona + + self.sandbox_id = sandbox_id + try: + self.sandbox = self.daytona.get_current_sandbox(self.sandbox_id) + except Exception as e: + logger.error(f"Error getting sandbox: {e}") + raise e + + self.api_url = self.sandbox.get_preview_link(8000) + + print("\033[95m***") + print(self.sandbox.get_preview_link(6080)) + print("***\033[0m") + \ No newline at end of file diff --git a/backend/docker/Dockerfile b/backend/docker/Dockerfile index 94a575df..ebbc8a4c 100644 --- a/backend/docker/Dockerfile +++ b/backend/docker/Dockerfile @@ -81,6 +81,6 @@ ENV RESOLUTION_HEIGHT=1080 RUN mkdir -p /var/log/supervisor COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf -EXPOSE 7788 6080 5901 +EXPOSE 7788 6080 5901 8000 8080 CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]