sb tempo

2025-04-08 21:09:45 +01:00 · 2025-04-08 21:09:45 +01:00 · 55b4d26348
parent 6da9e5dd8f
commit 55b4d26348
7 changed files with 459 additions and 7 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@ -11,7 +11,13 @@ REDIS_PORT=
 REDIS_PASSWORD=
 REDIS_SSL=

-
+# AWS Bedrock:
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 AWS_REGION_NAME=
+
+# Sandbox container provider:
+
+DAYTONA_API_KEY=
+DAYTONA_SERVER_URL=
+DAYTONA_TARGET=
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@ -3,12 +3,14 @@ import json
 import uuid
 from agentpress.thread_manager import ThreadManager
 from agent.tools.files_tool import FilesTool
+from agent.tools.sb_browse_tool import SandboxTool
 from agent.tools.terminal_tool import TerminalTool
 # from agent.tools.search_tool import CodeSearchTool
 from typing import Optional
 from agent.prompt import get_system_prompt
 from agentpress.response_processor import ProcessorConfig
 from dotenv import load_dotenv
+from agent.tools.utils.daytona_sandbox import create_sandbox

 # Load environment variables
 load_dotenv()
@ -19,20 +21,26 @@ async def run_agent(thread_id: str, stream: bool = True, thread_manager: Optiona
    if not thread_manager:
        thread_manager = ThreadManager()
    
+    if True: # todo: change to of not sandbox running
+        sandbox = create_sandbox(TEMP_PASSWORD)
+        sandbox_id = sandbox.id
+        sandbox_password = "vvv"
+
    print("Adding tools to thread manager...")
-    thread_manager.add_tool(FilesTool)
-    thread_manager.add_tool(TerminalTool)
+    # thread_manager.add_tool(FilesTool)
+    # thread_manager.add_tool(TerminalTool)
    # thread_manager.add_tool(CodeSearchTool)
-    
+    thread_manager.add_tool(SandboxTool, sandbox_id=sandbox_id, password=sandbox_password)
+
    system_message = {
        "role": "system",
        "content": get_system_prompt()
    }

-    model_name = "anthropic/claude-3-5-sonnet-latest" 
+    # model_name = "anthropic/claude-3-5-sonnet-latest" 
    
    #anthropic/claude-3-7-sonnet-latest
-    #openai/gpt-4o
+    model_name = "openai/gpt-4o"
    #groq/deepseek-r1-distill-llama-70b
    #bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0

--- a/backend/agent/tools/sb_browse_tool.py
+++ b/backend/agent/tools/sb_browse_tool.py
@ -0,0 +1,91 @@
+import traceback
+import requests
+
+from agentpress.tool import ToolResult, openapi_schema, xml_schema
+from agent.tools.utils.daytona_sandbox import SandboxToolsBase
+from utils.logger import logger
+
+
+# TODO: might want to be more granular with the tool names:
+# browser_view - View content of the current browser page. Use for checking the latest state of previously opened pages.
+# browser_navigate - Navigate browser to specified URL. Use when accessing new pages is needed.
+# browser_restart - Restart browser and navigate to specified URL. Use when browser state needs to be reset.
+# browser_click - Click on elements in the current browser page. Use when clicking page elements is needed.
+# browser_input - Overwrite text in editable elements on the current browser page. Use when filling content in input fields.
+# browser_move_mouse - Move cursor to specified position on the current browser page. Use when simulating user mouse movement.
+# browser_press_key - Simulate key press in the current browser page. Use when specific keyboard operations are needed.
+# browser_select_option - Select specified option from dropdown list element in the current browser page. Use when selecting dropdown menu options.
+# browser_scroll_up - Scroll up the current browser page. Use when viewing content above or returning to page top.
+# browser_scroll_down - Scroll down the current browser page. Use when viewing content below or jumping to page bottom.
+# browser_console_exec - Execute JavaScript code in browser console. Use when custom scripts need to be executed.
+# browser_console_view - View browser console output. Use when checking JavaScript logs or debugging page errors.
+
+
+class SandboxBrowseTool(SandboxToolsBase):
+    """Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
+    
+    def __init__(self, sandbox_id: str, password: str):
+        super().__init__(sandbox_id, password)
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "execute_browser_action",
+            "description": "Execute a simple browser action in the sandbox environment based on current state",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "task_description": {
+                        "type": "string",
+                        "description": "A simple action to do on the browser based on current state"
+                    }
+                },
+                "required": ["task_description"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="execute-browser-action",
+        mappings=[
+            {"param_name": "task_description", "node_type": "content", "path": "."}
+        ],
+        example='''
+        <execute-browser-action>
+        a simple action to do on the browser based on current state
+        </execute-browser-action>
+        '''
+    )
+    async def execute_browser_action(self, task_description: str) -> ToolResult:
+        """Execute a browser task in the sandbox environment using browser-use
+        
+        Args:
+            task_description (str): The task to execute
+            
+        Returns:
+            dict: Result of the execution
+        """
+        print(f"\033[95mExecuting browser action: {task_description}\033[0m")
+        try:
+            
+            
+            logger.info(f"Making API call to {self.api_url}/run-task with task: {task_description}")
+            
+            # Make the API call to our FastAPI endpoint
+            response = requests.post(
+                f"{self.api_url}/run-task",
+                json={"task_description": task_description},
+                timeout=None
+            )
+            
+            if response.status_code == 200:
+                logger.info("API call completed successfully")
+                print(response.json())
+                return self.success_response(response.json())
+            else:
+                logger.error(f"API call failed with status code {response.status_code}: {response.text}")
+                return self.fail_response(f"API call failed with status code {response.status_code}: {response.text}")
+
+        except Exception as e:
+            logger.error(f"Error executing browser action: {e}")
+            print(traceback.format_exc())
+            return self.fail_response(f"Error executing browser action: {e}")
--- a/backend/agent/tools/sb_shell_tool.py
+++ b/backend/agent/tools/sb_shell_tool.py
@ -0,0 +1,56 @@
+from agentpress.tool import ToolResult, openapi_schema, xml_schema
+from agent.tools.utils.daytona_sandbox import SandboxToolsBase
+
+
+class SandboxShellTool(SandboxToolsBase):
+    """Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
+
+    def __init__(self, sandbox_id: str, password: str):
+        super().__init__(sandbox_id, password)
+        
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "execute_command",
+            "description": "Execute a shell command in the workspace directory",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "The shell command to execute"
+                    }
+                },
+                "required": ["command"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="execute-command",
+        mappings=[
+            {"param_name": "command", "node_type": "content", "path": "."},
+        ],
+        example='''
+        <execute-command>
+        npm install package-name
+        </execute-command>
+        '''
+    )
+    async def execute_command(self, command: str, folder: str = None) -> ToolResult:
+        try:
+            folder = folder or self.sandbox.get_user_root_dir()
+            response = self.sandbox.process.exec(command, cwd=folder, timeout=60)
+            
+            if response.exit_code == 0:
+                return self.success_response({
+                    "output": response.result,
+                    "error": "",
+                    "exit_code": response.exit_code,
+                    "cwd": folder
+                })
+            else:
+                return self.fail_response(f"Command failed with exit code {response.exit_code}: {response.result}")
+                
+        except Exception as e:
+            return self.fail_response(f"Error executing command: {str(e)}")
--- a/backend/agent/tools/sb_website_tool.py
+++ b/backend/agent/tools/sb_website_tool.py
@ -0,0 +1,56 @@
+from agentpress.tool import ToolResult, openapi_schema, xml_schema
+from agent.tools.utils.daytona_sandbox import SandboxToolsBase
+
+
+class SandboxWebsiteTool(SandboxToolsBase):
+    """Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
+
+    def __init__(self, sandbox_id: str, password: str):
+        super().__init__(sandbox_id, password)
+        
+
+    @openapi_schema({
+        "type": "function",
+        "function": {
+            "name": "execute_command",
+            "description": "Execute a shell command in the workspace directory",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "The shell command to execute"
+                    }
+                },
+                "required": ["command"]
+            }
+        }
+    })
+    @xml_schema(
+        tag_name="execute-command",
+        mappings=[
+            {"param_name": "command", "node_type": "content", "path": "."},
+        ],
+        example='''
+        <execute-command>
+        npm install package-name
+        </execute-command>
+        '''
+    )
+    async def execute_command(self, command: str, folder: str = None) -> ToolResult:
+        try:
+            folder = folder or self.sandbox.get_user_root_dir()
+            response = self.sandbox.process.exec(command, cwd=folder, timeout=60)
+            
+            if response.exit_code == 0:
+                return self.success_response({
+                    "output": response.result,
+                    "error": "",
+                    "exit_code": response.exit_code,
+                    "cwd": folder
+                })
+            else:
+                return self.fail_response(f"Command failed with exit code {response.exit_code}: {response.result}")
+                
+        except Exception as e:
+            return self.fail_response(f"Error executing command: {str(e)}")
--- a/backend/agent/tools/utils/daytona_sandbox.py
+++ b/backend/agent/tools/utils/daytona_sandbox.py
@ -0,0 +1,235 @@
+import os
+import requests
+from time import sleep
+
+from daytona_sdk import Daytona, DaytonaConfig, CreateSandboxParams, SessionExecuteRequest
+
+from agentpress.tool import Tool
+from utils.logger import logger
+
+config = DaytonaConfig(
+    api_key=os.getenv("DAYTONA_API_KEY"),
+    server_url=os.getenv("DAYTONA_SERVER_URL"),
+    target=os.getenv("DAYTONA_TARGET")
+)
+daytona = Daytona(config)
+
+
+sandbox_api = b'''
+import traceback
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from browser_use import Agent, Browser
+from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
+from langchain_openai import ChatOpenAI
+import uvicorn
+from contextlib import asynccontextmanager
+import logging
+import logging.handlers
+import os
+
+# Configure logging
+log_dir = "/var/log/kortix"
+os.makedirs(log_dir, exist_ok=True)
+log_file = os.path.join(log_dir, "kortix_api.log")
+
+logger = logging.getLogger("kortix_api")
+logger.setLevel(logging.INFO)
+
+# Create rotating file handler
+file_handler = logging.handlers.RotatingFileHandler(
+    log_file,
+    maxBytes=10485760,  # 10MB
+    backupCount=5
+)
+file_handler.setFormatter(
+    logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+)
+logger.addHandler(file_handler)
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Initialize browser on startup
+    global browser
+    try:
+        browser = Browser()
+        logger.info("Browser initialized successfully")
+    except Exception as e:
+        logger.error(f"Error initializing browser: {str(e)}")
+        logger.error(traceback.format_exc())
+    yield
+    # Clean up resources at shutdown if needed
+
+app = FastAPI(lifespan=lifespan)
+
+# Global variables to maintain browser state
+browser = None
+browser_context = None
+agent = None
+
+class TaskRequest(BaseModel):
+    task_description: str
+
+@app.post("/run-task")
+async def run_task(request: TaskRequest):
+    global browser, browser_context, agent
+    
+    if not browser:
+        try:
+            browser = Browser()
+        except Exception as e:
+            error_msg = f"Failed to initialize browser: {str(e)}"
+            logger.error(error_msg)
+            raise HTTPException(status_code=500, detail=error_msg)
+    
+    try:
+        # Create a browser context if it doesn't exist
+        if not browser_context:
+            browser_context = await browser.new_context(
+                config=BrowserContextConfig(
+                    browser_window_size=BrowserContextWindowSize(
+                        width=1280, height=800
+                    ),
+                )
+            )
+            logger.info("Created new browser context")
+        
+        # Create a new agent for each task
+        agent = Agent(
+            task=request.task_description,
+            llm=ChatOpenAI(model="gpt-4o"),
+            browser=browser,
+            browser_context=browser_context
+        )
+        logger.info(f"Starting task: {request.task_description}")
+        
+        result = await agent.run()
+        
+        # Format the history for response
+        history = []
+        for h in result.history:
+            logger.debug(f"Task history entry: {h}")
+            history.append(str(h))
+            
+        logger.info("Task completed successfully")
+        return {
+            "status": "success", 
+            "history": history
+        }
+        
+    except Exception as e:
+        error_traceback = traceback.format_exc()
+        logger.error(f"Error during task execution: {str(e)}")
+        logger.error(error_traceback)
+        raise HTTPException(
+            status_code=500, 
+            detail={
+                "status": "error", 
+                "error": str(e), 
+                "traceback": error_traceback
+            }
+        )
+
+@app.get("/health")
+async def health_check():
+    status = {
+        "status": "healthy",
+        "browser_initialized": browser is not None,
+        "context_initialized": browser_context is not None
+    }
+    logger.debug(f"Health check: {status}")
+    return status
+
+if __name__ == "__main__":
+    logger.info("Starting Kortix API server")
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+'''
+
+def create_sandbox(password: str):
+    sandbox = daytona.create(CreateSandboxParams(
+        image="adamcohenhillel/kortix-browser-use:0.0.1",
+        env_vars={
+            "CHROME_PERSISTENT_SESSION": "true",
+            "RESOLUTION": "1920x1080x24",
+            "RESOLUTION_WIDTH": "1920",
+            "RESOLUTION_HEIGHT": "1080",
+            "VNC_PASSWORD": password,
+            "OPENAI_ENDPOINT": "https://api.openai.com/v1",
+            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
+            "ANTHROPIC_API_KEY": "",
+            "ANTHROPIC_ENDPOINT": "https://api.anthropic.com",
+            "GOOGLE_API_KEY": "",
+            "AZURE_OPENAI_ENDPOINT": "",
+            "AZURE_OPENAI_API_KEY": "",
+            "AZURE_OPENAI_API_VERSION": "2025-01-01-preview",
+            "DEEPSEEK_ENDPOINT": "https://api.deepseek.com",
+            "DEEPSEEK_API_KEY": "",
+            "OLLAMA_ENDPOINT": "http://localhost:11434",
+            "ANONYMIZED_TELEMETRY": "false",
+            "BROWSER_USE_LOGGING_LEVEL": "info",
+            "CHROME_PATH": "",
+            "CHROME_USER_DATA": "",
+            "CHROME_DEBUGGING_PORT": "9222",
+            "CHROME_DEBUGGING_HOST": "localhost",
+            "CHROME_CDP": ""
+        },
+        ports=[
+            7788,  # Gradio default port
+            6080,  # noVNC web interface
+            5901,  # VNC port
+            9222,  # Chrome remote debugging port
+            8000,  # FastAPI port
+            8080   # HTTP website port
+        ]
+    ))
+    sandbox.fs.upload_file(sandbox.get_user_root_dir() + "/app.py", sandbox_api)
+    sandbox.process.create_session('kortix_browser_use_api')
+    rsp = sandbox.process.execute_session_command('kortix_browser_use_api', SessionExecuteRequest(
+        command="python " + sandbox.get_user_root_dir() + "/app.py",
+        var_async=True
+    ))
+    
+    times = 0
+    success = False
+    api_url = sandbox.get_preview_link(8000)
+    while times < 10:
+        times += 1
+        logger.info(f"Waiting for API to be ready...")
+        # Make the API call to our FastAPI endpoint
+        response = requests.get(f"{api_url}/health")
+        if response.status_code == 200:
+            logger.info(f"API call completed successfully")
+            success = True
+            break
+        else:
+            sleep(1)
+
+    if not success:
+        raise Exception("API call failed")
+    
+    logger.info(f"Executed command {rsp}")
+    logger.info(f"Created kortix_browser_use_api session `kortix_browser_use_api`")
+    return sandbox
+
+
+class SandboxToolsBase(Tool):
+    """Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
+    
+    def __init__(self, sandbox_id: str, password: str):
+        super().__init__()
+        self.sandbox = None
+        self.daytona = daytona
+
+        self.sandbox_id = sandbox_id
+        try:
+            self.sandbox = self.daytona.get_current_sandbox(self.sandbox_id)
+        except Exception as e:
+            logger.error(f"Error getting sandbox: {e}")
+            raise e
+
+        self.api_url = self.sandbox.get_preview_link(8000)
+        
+        print("\033[95m***")
+        print(self.sandbox.get_preview_link(6080))
+        print("***\033[0m")
+  
--- a/backend/docker/Dockerfile
+++ b/backend/docker/Dockerfile
@ -81,6 +81,6 @@ ENV RESOLUTION_HEIGHT=1080
 RUN mkdir -p /var/log/supervisor
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

-EXPOSE 7788 6080 5901
+EXPOSE 7788 6080 5901 8000 8080

 CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]