2025-07-07 02:07:26 +08:00
import asyncio
2025-05-10 11:46:48 +08:00
from typing import Optional , Dict , Any
import time
2025-07-07 02:07:26 +08:00
import asyncio
2025-04-11 23:56:26 +08:00
from uuid import uuid4
2025-04-09 04:09:45 +08:00
from agentpress . tool import ToolResult , openapi_schema , xml_schema
2025-05-10 10:26:42 +08:00
from sandbox . tool_base import SandboxToolsBase
2025-04-23 17:53:38 +08:00
from agentpress . thread_manager import ThreadManager
2025-04-09 04:09:45 +08:00
class SandboxShellTool ( SandboxToolsBase ) :
2025-04-11 23:56:26 +08:00
""" Tool for executing tasks in a Daytona sandbox with browser-use capabilities.
Uses sessions for maintaining state between commands and provides comprehensive process management . """
2025-04-09 04:09:45 +08:00
2025-04-23 17:53:38 +08:00
def __init__ ( self , project_id : str , thread_manager : ThreadManager ) :
super ( ) . __init__ ( project_id , thread_manager )
2025-04-11 23:56:26 +08:00
self . _sessions : Dict [ str , str ] = { } # Maps session names to session IDs
2025-04-12 07:22:01 +08:00
self . workspace_path = " /workspace " # Ensure we're always operating in /workspace
2025-04-11 23:56:26 +08:00
async def _ensure_session ( self , session_name : str = " default " ) - > str :
""" Ensure a session exists and return its ID. """
if session_name not in self . _sessions :
session_id = str ( uuid4 ( ) )
try :
2025-04-23 17:53:38 +08:00
await self . _ensure_sandbox ( ) # Ensure sandbox is initialized
2025-07-04 23:42:53 +08:00
await self . sandbox . process . create_session ( session_id )
2025-04-11 23:56:26 +08:00
self . _sessions [ session_name ] = session_id
except Exception as e :
raise RuntimeError ( f " Failed to create session: { str ( e ) } " )
return self . _sessions [ session_name ]
async def _cleanup_session ( self , session_name : str ) :
""" Clean up a session if it exists. """
if session_name in self . _sessions :
try :
2025-04-23 17:53:38 +08:00
await self . _ensure_sandbox ( ) # Ensure sandbox is initialized
2025-07-04 23:42:53 +08:00
await self . sandbox . process . delete_session ( self . _sessions [ session_name ] )
2025-04-11 23:56:26 +08:00
del self . _sessions [ session_name ]
except Exception as e :
print ( f " Warning: Failed to cleanup session { session_name } : { str ( e ) } " )
2025-04-09 04:09:45 +08:00
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " execute_command " ,
2025-05-10 11:46:48 +08:00
" description " : " Execute a shell command in the workspace directory. IMPORTANT: Commands are non-blocking by default and run in a tmux session. This is ideal for long-running operations like starting servers or build processes. Uses sessions to maintain state between commands. This tool is essential for running CLI tools, installing packages, and managing system operations. " ,
2025-04-09 04:09:45 +08:00
" parameters " : {
" type " : " object " ,
" properties " : {
" command " : {
" type " : " string " ,
2025-05-10 11:46:48 +08:00
" description " : " The shell command to execute. Use this for running CLI tools, installing packages, or system operations. Commands can be chained using &&, ||, and | operators. "
2025-04-11 23:56:26 +08:00
} ,
" folder " : {
" type " : " string " ,
" description " : " Optional relative path to a subdirectory of /workspace where the command should be executed. Example: ' data/pdfs ' "
} ,
" session_name " : {
" type " : " string " ,
2025-05-10 11:46:48 +08:00
" description " : " Optional name of the tmux session to use. Use named sessions for related commands that need to maintain state. Defaults to a random session name. " ,
} ,
" blocking " : {
" type " : " boolean " ,
" description " : " Whether to wait for the command to complete. Defaults to false for non-blocking execution. " ,
" default " : False
2025-04-11 23:56:26 +08:00
} ,
" timeout " : {
" type " : " integer " ,
2025-05-10 11:46:48 +08:00
" description " : " Optional timeout in seconds for blocking commands. Defaults to 60. Ignored for non-blocking commands. " ,
2025-04-25 20:23:05 +08:00
" default " : 60
2025-04-09 04:09:45 +08:00
}
} ,
" required " : [ " command " ]
}
}
} )
@xml_schema (
tag_name = " execute-command " ,
mappings = [
{ " param_name " : " command " , " node_type " : " content " , " path " : " . " } ,
2025-04-11 23:56:26 +08:00
{ " param_name " : " folder " , " node_type " : " attribute " , " path " : " . " , " required " : False } ,
{ " param_name " : " session_name " , " node_type " : " attribute " , " path " : " . " , " required " : False } ,
2025-05-10 11:46:48 +08:00
{ " param_name " : " blocking " , " node_type " : " attribute " , " path " : " . " , " required " : False } ,
2025-04-11 23:56:26 +08:00
{ " param_name " : " timeout " , " node_type " : " attribute " , " path " : " . " , " required " : False }
2025-04-09 04:09:45 +08:00
] ,
example = '''
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " execute_command " >
< parameter name = " command " > npm run dev < / parameter >
< parameter name = " session_name " > dev_server < / parameter >
< / invoke >
< / function_calls >
2025-04-11 23:56:26 +08:00
2025-04-26 02:18:16 +08:00
< ! - - Example 2 : Running in Specific Directory - - >
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " execute_command " >
< parameter name = " command " > npm run build < / parameter >
< parameter name = " folder " > frontend < / parameter >
< parameter name = " session_name " > build_process < / parameter >
< / invoke >
< / function_calls >
2025-04-11 23:56:26 +08:00
2025-05-28 20:07:54 +08:00
< ! - - Example 3 : Blocking command ( wait for completion ) - - >
< function_calls >
< invoke name = " execute_command " >
< parameter name = " command " > npm install < / parameter >
< parameter name = " blocking " > true < / parameter >
< parameter name = " timeout " > 300 < / parameter >
< / invoke >
< / function_calls >
2025-04-09 04:09:45 +08:00
'''
)
2025-04-11 23:56:26 +08:00
async def execute_command (
self ,
command : str ,
folder : Optional [ str ] = None ,
2025-05-10 11:46:48 +08:00
session_name : Optional [ str ] = None ,
blocking : bool = False ,
2025-04-25 20:23:05 +08:00
timeout : int = 60
2025-04-11 23:56:26 +08:00
) - > ToolResult :
2025-04-09 04:09:45 +08:00
try :
2025-04-23 17:53:38 +08:00
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
2025-04-11 23:56:26 +08:00
# Set up working directory
cwd = self . workspace_path
if folder :
folder = folder . strip ( ' / ' )
cwd = f " { self . workspace_path } / { folder } "
2025-05-10 11:46:48 +08:00
# Generate a session name if not provided
if not session_name :
session_name = f " session_ { str ( uuid4 ( ) ) [ : 8 ] } "
# Check if tmux session already exists
check_session = await self . _execute_raw_command ( f " tmux has-session -t { session_name } 2>/dev/null || echo ' not_exists ' " )
session_exists = " not_exists " not in check_session . get ( " output " , " " )
if not session_exists :
# Create a new tmux session
await self . _execute_raw_command ( f " tmux new-session -d -s { session_name } " )
# Ensure we're in the correct directory and send command to tmux
full_command = f " cd { cwd } && { command } "
wrapped_command = full_command . replace ( ' " ' , ' \\ " ' ) # Escape double quotes
if blocking :
2025-07-07 02:07:26 +08:00
# For blocking execution, use a more reliable approach
# Add a unique marker to detect command completion
marker = f " COMMAND_DONE_ { str ( uuid4 ( ) ) [ : 8 ] } "
completion_command = f " { command } ; echo { marker } "
wrapped_completion_command = completion_command . replace ( ' " ' , ' \\ " ' )
# Send the command with completion marker
await self . _execute_raw_command ( f ' tmux send-keys -t { session_name } " cd { cwd } && { wrapped_completion_command } " Enter ' )
2025-05-10 11:46:48 +08:00
start_time = time . time ( )
2025-07-07 02:07:26 +08:00
final_output = " "
2025-05-10 11:46:48 +08:00
while ( time . time ( ) - start_time ) < timeout :
2025-07-07 02:07:26 +08:00
# Wait a shorter interval for more responsive checking
await asyncio . sleep ( 0.5 )
2025-05-10 11:46:48 +08:00
# Check if session still exists (command might have exited)
check_result = await self . _execute_raw_command ( f " tmux has-session -t { session_name } 2>/dev/null || echo ' ended ' " )
if " ended " in check_result . get ( " output " , " " ) :
break
2025-07-07 02:07:26 +08:00
# Get current output and check for our completion marker
2025-05-20 21:31:50 +08:00
output_result = await self . _execute_raw_command ( f " tmux capture-pane -t { session_name } -p -S - -E - " )
2025-05-10 11:46:48 +08:00
current_output = output_result . get ( " output " , " " )
2025-07-24 11:17:44 +08:00
if self . _is_command_completed ( current_output , marker ) :
2025-07-07 02:07:26 +08:00
final_output = current_output
2025-05-10 11:46:48 +08:00
break
2025-07-07 02:07:26 +08:00
# If we didn't get the marker, capture whatever output we have
if not final_output :
output_result = await self . _execute_raw_command ( f " tmux capture-pane -t { session_name } -p -S - -E - " )
final_output = output_result . get ( " output " , " " )
2025-05-10 11:46:48 +08:00
# Kill the session after capture
await self . _execute_raw_command ( f " tmux kill-session -t { session_name } " )
2025-04-09 04:09:45 +08:00
return self . success_response ( {
2025-05-10 11:46:48 +08:00
" output " : final_output ,
" session_name " : session_name ,
" cwd " : cwd ,
" completed " : True
2025-04-09 04:09:45 +08:00
} )
else :
2025-07-07 02:07:26 +08:00
# Send command to tmux session for non-blocking execution
await self . _execute_raw_command ( f ' tmux send-keys -t { session_name } " { wrapped_command } " Enter ' )
2025-05-10 11:46:48 +08:00
# For non-blocking, just return immediately
return self . success_response ( {
" session_name " : session_name ,
" cwd " : cwd ,
" message " : f " Command sent to tmux session ' { session_name } ' . Use check_command_output to view results. " ,
" completed " : False
} )
2025-04-09 04:09:45 +08:00
except Exception as e :
2025-05-10 11:46:48 +08:00
# Attempt to clean up session in case of error
if session_name :
try :
await self . _execute_raw_command ( f " tmux kill-session -t { session_name } " )
except :
pass
2025-04-09 04:09:45 +08:00
return self . fail_response ( f " Error executing command: { str ( e ) } " )
2025-04-09 20:46:13 +08:00
2025-05-10 11:46:48 +08:00
async def _execute_raw_command ( self , command : str ) - > Dict [ str , Any ] :
""" Execute a raw command directly in the sandbox. """
# Ensure session exists for raw commands
session_id = await self . _ensure_session ( " raw_commands " )
# Execute command in session
2025-07-07 02:07:26 +08:00
from daytona_sdk import SessionExecuteRequest
2025-05-10 11:46:48 +08:00
req = SessionExecuteRequest (
command = command ,
var_async = False ,
cwd = self . workspace_path
)
2025-07-04 23:42:53 +08:00
response = await self . sandbox . process . execute_session_command (
2025-05-10 11:46:48 +08:00
session_id = session_id ,
req = req ,
timeout = 30 # Short timeout for utility commands
)
2025-07-04 23:42:53 +08:00
logs = await self . sandbox . process . get_session_command_logs (
2025-05-10 11:46:48 +08:00
session_id = session_id ,
command_id = response . cmd_id
)
return {
" output " : logs ,
" exit_code " : response . exit_code
}
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " check_command_output " ,
" description " : " Check the output of a previously executed command in a tmux session. Use this to monitor the progress or results of non-blocking commands. " ,
" parameters " : {
" type " : " object " ,
" properties " : {
" session_name " : {
" type " : " string " ,
" description " : " The name of the tmux session to check. "
} ,
" kill_session " : {
" type " : " boolean " ,
" description " : " Whether to terminate the tmux session after checking. Set to true when you ' re done with the command. " ,
" default " : False
}
} ,
" required " : [ " session_name " ]
}
}
} )
@xml_schema (
tag_name = " check-command-output " ,
mappings = [
{ " param_name " : " session_name " , " node_type " : " attribute " , " path " : " . " , " required " : True } ,
{ " param_name " : " kill_session " , " node_type " : " attribute " , " path " : " . " , " required " : False }
] ,
example = '''
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " check_command_output " >
< parameter name = " session_name " > dev_server < / parameter >
< / invoke >
< / function_calls >
2025-05-10 11:46:48 +08:00
< ! - - Example 2 : Check final output and kill session - - >
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " check_command_output " >
< parameter name = " session_name " > build_process < / parameter >
< parameter name = " kill_session " > true < / parameter >
< / invoke >
< / function_calls >
2025-05-10 11:46:48 +08:00
'''
)
async def check_command_output (
self ,
session_name : str ,
kill_session : bool = False
) - > ToolResult :
try :
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
# Check if session exists
check_result = await self . _execute_raw_command ( f " tmux has-session -t { session_name } 2>/dev/null || echo ' not_exists ' " )
if " not_exists " in check_result . get ( " output " , " " ) :
return self . fail_response ( f " Tmux session ' { session_name } ' does not exist. " )
# Get output from tmux pane
2025-05-20 21:31:50 +08:00
output_result = await self . _execute_raw_command ( f " tmux capture-pane -t { session_name } -p -S - -E - " )
2025-05-10 11:46:48 +08:00
output = output_result . get ( " output " , " " )
# Kill session if requested
if kill_session :
await self . _execute_raw_command ( f " tmux kill-session -t { session_name } " )
termination_status = " Session terminated. "
else :
termination_status = " Session still running. "
return self . success_response ( {
" output " : output ,
" session_name " : session_name ,
" status " : termination_status
} )
except Exception as e :
return self . fail_response ( f " Error checking command output: { str ( e ) } " )
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " terminate_command " ,
" description " : " Terminate a running command by killing its tmux session. " ,
" parameters " : {
" type " : " object " ,
" properties " : {
" session_name " : {
" type " : " string " ,
" description " : " The name of the tmux session to terminate. "
}
} ,
" required " : [ " session_name " ]
}
}
} )
@xml_schema (
tag_name = " terminate-command " ,
mappings = [
{ " param_name " : " session_name " , " node_type " : " attribute " , " path " : " . " , " required " : True }
] ,
example = '''
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " terminate_command " >
< parameter name = " session_name " > dev_server < / parameter >
< / invoke >
< / function_calls >
2025-05-10 11:46:48 +08:00
'''
)
async def terminate_command (
self ,
session_name : str
) - > ToolResult :
try :
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
# Check if session exists
check_result = await self . _execute_raw_command ( f " tmux has-session -t { session_name } 2>/dev/null || echo ' not_exists ' " )
if " not_exists " in check_result . get ( " output " , " " ) :
return self . fail_response ( f " Tmux session ' { session_name } ' does not exist. " )
# Kill the session
await self . _execute_raw_command ( f " tmux kill-session -t { session_name } " )
return self . success_response ( {
" message " : f " Tmux session ' { session_name } ' terminated successfully. "
} )
except Exception as e :
return self . fail_response ( f " Error terminating command: { str ( e ) } " )
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " list_commands " ,
" description " : " List all running tmux sessions and their status. " ,
" parameters " : {
" type " : " object " ,
" properties " : { }
}
}
} )
@xml_schema (
tag_name = " list-commands " ,
mappings = [ ] ,
example = '''
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " list_commands " >
< / invoke >
< / function_calls >
2025-05-10 11:46:48 +08:00
'''
)
async def list_commands ( self ) - > ToolResult :
try :
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
# List all tmux sessions
result = await self . _execute_raw_command ( " tmux list-sessions 2>/dev/null || echo ' No sessions ' " )
output = result . get ( " output " , " " )
if " No sessions " in output or not output . strip ( ) :
return self . success_response ( {
" message " : " No active tmux sessions found. " ,
" sessions " : [ ]
} )
# Parse session list
sessions = [ ]
for line in output . split ( ' \n ' ) :
if line . strip ( ) :
parts = line . split ( ' : ' )
if parts :
session_name = parts [ 0 ] . strip ( )
sessions . append ( session_name )
return self . success_response ( {
" message " : f " Found { len ( sessions ) } active sessions. " ,
" sessions " : sessions
} )
except Exception as e :
return self . fail_response ( f " Error listing commands: { str ( e ) } " )
2025-07-24 11:17:44 +08:00
def _is_command_completed ( self , current_output : str , marker : str ) - > bool :
"""
Check if command execution is completed by comparing marker from end to start .
Args :
current_output : Current output content
marker : Completion marker
Returns :
bool : True if command completed , False otherwise
"""
if not current_output or not marker :
return False
# Find the last complete marker match position to start comparison
# Avoid terminal prompt output at the end
marker_end_pos = - 1
for i in range ( len ( current_output ) - len ( marker ) , - 1 , - 1 ) :
if current_output [ i : i + len ( marker ) ] == marker :
marker_end_pos = i + len ( marker ) - 1
break
# Start comparison from found marker position or end of output
if marker_end_pos != - 1 :
output_idx = marker_end_pos
marker_idx = len ( marker ) - 1
else :
output_idx = len ( current_output ) - 1
marker_idx = len ( marker ) - 1
# Compare characters from end to start
while marker_idx > = 0 and output_idx > = 0 :
# Skip newlines in current_output
if current_output [ output_idx ] == ' \n ' :
output_idx - = 1
continue
# Compare characters
if current_output [ output_idx ] != marker [ marker_idx ] :
return False
# Continue comparison
output_idx - = 1
marker_idx - = 1
# If marker not fully matched
if marker_idx > = 0 :
return False
# Check if preceded by "echo " (command just started)
check_count = 0
echo_chars = " echo "
echo_idx = len ( echo_chars ) - 1
while output_idx > = 0 and check_count < 5 :
# Skip newlines
if current_output [ output_idx ] == ' \n ' :
output_idx - = 1
continue
check_count + = 1
# Check for "echo " pattern
if echo_idx > = 0 and current_output [ output_idx ] == echo_chars [ echo_idx ] :
echo_idx - = 1
else :
echo_idx = len ( echo_chars ) - 1
output_idx - = 1
# If "echo " found, command just started
if echo_idx < 0 :
return False
return True
2025-04-11 23:56:26 +08:00
async def cleanup ( self ) :
""" Clean up all sessions. """
for session_name in list ( self . _sessions . keys ( ) ) :
2025-05-10 11:46:48 +08:00
await self . _cleanup_session ( session_name )
# Also clean up any tmux sessions
try :
await self . _ensure_sandbox ( )
await self . _execute_raw_command ( " tmux kill-server 2>/dev/null || true " )
except :
pass