This commit is contained in:
marko-kraemer 2025-04-25 13:23:05 +01:00
parent 2cf777cc4c
commit 4339f6b56b
3 changed files with 34 additions and 21 deletions

View File

@ -227,7 +227,7 @@ async def _cleanup_redis_response_list(agent_run_id: str):
logger.warning(f"Failed to set TTL on response list {response_list_key}: {str(e)}") logger.warning(f"Failed to set TTL on response list {response_list_key}: {str(e)}")
async def restore_running_agent_runs(): async def restore_running_agent_runs():
"""Mark agent runs that were still 'running' in the database as failed.""" """Mark agent runs that were still 'running' in the database as failed and clean up Redis resources."""
logger.info("Restoring running agent runs after server restart") logger.info("Restoring running agent runs after server restart")
client = await db.client client = await db.client
running_agent_runs = await client.table('agent_runs').select('id').eq("status", "running").execute() running_agent_runs = await client.table('agent_runs').select('id').eq("status", "running").execute()
@ -235,6 +235,27 @@ async def restore_running_agent_runs():
for run in running_agent_runs.data: for run in running_agent_runs.data:
agent_run_id = run['id'] agent_run_id = run['id']
logger.warning(f"Found running agent run {agent_run_id} from before server restart") logger.warning(f"Found running agent run {agent_run_id} from before server restart")
# Clean up Redis resources for this run
try:
# Clean up active run key
active_run_key = f"active_run:{instance_id}:{agent_run_id}"
await redis.delete(active_run_key)
# Clean up response list
response_list_key = f"agent_run:{agent_run_id}:responses"
await redis.delete(response_list_key)
# Clean up control channels
control_channel = f"agent_run:{agent_run_id}:control"
instance_control_channel = f"agent_run:{agent_run_id}:control:{instance_id}"
await redis.delete(control_channel)
await redis.delete(instance_control_channel)
logger.info(f"Cleaned up Redis resources for agent run {agent_run_id}")
except Exception as e:
logger.error(f"Error cleaning up Redis resources for agent run {agent_run_id}: {e}")
# Call stop_agent_run to handle status update and cleanup # Call stop_agent_run to handle status update and cleanup
await stop_agent_run(agent_run_id, error_message="Server restarted while agent was running") await stop_agent_run(agent_run_id, error_message="Server restarted while agent was running")

View File

@ -58,8 +58,8 @@ class SandboxShellTool(SandboxToolsBase):
}, },
"timeout": { "timeout": {
"type": "integer", "type": "integer",
"description": "Optional timeout in seconds. Increase for long-running commands. Defaults to 180. For commands that might exceed this timeout, use background execution with & operator instead.", "description": "Optional timeout in seconds. Increase for long-running commands. Defaults to 60. For commands that might exceed this timeout, use background execution with & operator instead.",
"default": 180 "default": 60
} }
}, },
"required": ["command"] "required": ["command"]
@ -102,39 +102,29 @@ class SandboxShellTool(SandboxToolsBase):
pdftotext input.pdf -layout 2>&1 || echo "Error processing PDF" && ls -la output.txt pdftotext input.pdf -layout 2>&1 || echo "Error processing PDF" && ls -la output.txt
</execute-command> </execute-command>
<!-- Example 6: Command with custom timeout (3 minutes) -->
<execute-command timeout="180">
python long_running_script.py
</execute-command>
<!-- Example 7: Command with custom timeout and folder -->
<execute-command folder="scripts" timeout="180">
python data_processing.py
</execute-command>
<!-- NON-BLOCKING COMMANDS: Use these for long-running operations to prevent timeouts --> <!-- NON-BLOCKING COMMANDS: Use these for long-running operations to prevent timeouts -->
<!-- Example 8: Basic non-blocking command with & operator --> <!-- Example 6: Basic non-blocking command with & operator -->
<execute-command> <execute-command>
python scraper.py --large-dataset > scraper_output.log 2>&1 & python scraper.py --large-dataset > scraper_output.log 2>&1 &
</execute-command> </execute-command>
<!-- Example 9: Run a process with nohup for immunity to hangups --> <!-- Example 7: Run a process with nohup for immunity to hangups -->
<execute-command> <execute-command>
nohup python processor.py --heavy-computation > processor.log 2>&1 & nohup python processor.py --heavy-computation > processor.log 2>&1 &
</execute-command> </execute-command>
<!-- Example 10: Starting a background process and storing its PID --> <!-- Example 8: Starting a background process and storing its PID -->
<execute-command> <execute-command>
python long_task.py & echo $! > task.pid python long_task.py & echo $! > task.pid
</execute-command> </execute-command>
<!-- Example 11: Checking if a process is still running --> <!-- Example 9: Checking if a process is still running -->
<execute-command> <execute-command>
ps -p $(cat task.pid) ps -p $(cat task.pid)
</execute-command> </execute-command>
<!-- Example 12: Killing a background process --> <!-- Example 10: Killing a background process -->
<execute-command> <execute-command>
kill $(cat task.pid) kill $(cat task.pid)
</execute-command> </execute-command>
@ -145,7 +135,7 @@ class SandboxShellTool(SandboxToolsBase):
command: str, command: str,
folder: Optional[str] = None, folder: Optional[str] = None,
session_name: str = "default", session_name: str = "default",
timeout: int = 180 timeout: int = 60
) -> ToolResult: ) -> ToolResult:
try: try:
# Ensure sandbox is initialized # Ensure sandbox is initialized
@ -201,4 +191,4 @@ class SandboxShellTool(SandboxToolsBase):
async def cleanup(self): async def cleanup(self):
"""Clean up all sessions.""" """Clean up all sessions."""
for session_name in list(self._sessions.keys()): for session_name in list(self._sessions.keys()):
await self._cleanup_session(session_name) await self._cleanup_session(session_name)

View File

@ -23,7 +23,7 @@ load_dotenv()
# Initialize managers # Initialize managers
db = DBConnection() db = DBConnection()
thread_manager = None thread_manager = None
instance_id = str(uuid.uuid4())[:8] # Generate instance ID at module load time instance_id = "single"
# Rate limiter state # Rate limiter state
ip_tracker = OrderedDict() ip_tracker = OrderedDict()
@ -70,7 +70,9 @@ async def lifespan(app: FastAPI):
# Clean up Redis connection # Clean up Redis connection
try: try:
logger.info("Closing Redis connection")
await redis.close() await redis.close()
logger.info("Redis connection closed successfully")
except Exception as e: except Exception as e:
logger.error(f"Error closing Redis connection: {e}") logger.error(f"Error closing Redis connection: {e}")