This commit is contained in:
marko-kraemer 2025-04-25 13:23:05 +01:00
parent 2cf777cc4c
commit 4339f6b56b
3 changed files with 34 additions and 21 deletions

View File

@ -227,7 +227,7 @@ async def _cleanup_redis_response_list(agent_run_id: str):
logger.warning(f"Failed to set TTL on response list {response_list_key}: {str(e)}")
async def restore_running_agent_runs():
"""Mark agent runs that were still 'running' in the database as failed."""
"""Mark agent runs that were still 'running' in the database as failed and clean up Redis resources."""
logger.info("Restoring running agent runs after server restart")
client = await db.client
running_agent_runs = await client.table('agent_runs').select('id').eq("status", "running").execute()
@ -235,6 +235,27 @@ async def restore_running_agent_runs():
for run in running_agent_runs.data:
agent_run_id = run['id']
logger.warning(f"Found running agent run {agent_run_id} from before server restart")
# Clean up Redis resources for this run
try:
# Clean up active run key
active_run_key = f"active_run:{instance_id}:{agent_run_id}"
await redis.delete(active_run_key)
# Clean up response list
response_list_key = f"agent_run:{agent_run_id}:responses"
await redis.delete(response_list_key)
# Clean up control channels
control_channel = f"agent_run:{agent_run_id}:control"
instance_control_channel = f"agent_run:{agent_run_id}:control:{instance_id}"
await redis.delete(control_channel)
await redis.delete(instance_control_channel)
logger.info(f"Cleaned up Redis resources for agent run {agent_run_id}")
except Exception as e:
logger.error(f"Error cleaning up Redis resources for agent run {agent_run_id}: {e}")
# Call stop_agent_run to handle status update and cleanup
await stop_agent_run(agent_run_id, error_message="Server restarted while agent was running")

View File

@ -58,8 +58,8 @@ class SandboxShellTool(SandboxToolsBase):
},
"timeout": {
"type": "integer",
"description": "Optional timeout in seconds. Increase for long-running commands. Defaults to 180. For commands that might exceed this timeout, use background execution with & operator instead.",
"default": 180
"description": "Optional timeout in seconds. Increase for long-running commands. Defaults to 60. For commands that might exceed this timeout, use background execution with & operator instead.",
"default": 60
}
},
"required": ["command"]
@ -102,39 +102,29 @@ class SandboxShellTool(SandboxToolsBase):
pdftotext input.pdf -layout 2>&1 || echo "Error processing PDF" && ls -la output.txt
</execute-command>
<!-- Example 6: Command with custom timeout (3 minutes) -->
<execute-command timeout="180">
python long_running_script.py
</execute-command>
<!-- Example 7: Command with custom timeout and folder -->
<execute-command folder="scripts" timeout="180">
python data_processing.py
</execute-command>
<!-- NON-BLOCKING COMMANDS: Use these for long-running operations to prevent timeouts -->
<!-- Example 8: Basic non-blocking command with & operator -->
<!-- Example 6: Basic non-blocking command with & operator -->
<execute-command>
python scraper.py --large-dataset > scraper_output.log 2>&1 &
</execute-command>
<!-- Example 9: Run a process with nohup for immunity to hangups -->
<!-- Example 7: Run a process with nohup for immunity to hangups -->
<execute-command>
nohup python processor.py --heavy-computation > processor.log 2>&1 &
</execute-command>
<!-- Example 10: Starting a background process and storing its PID -->
<!-- Example 8: Starting a background process and storing its PID -->
<execute-command>
python long_task.py & echo $! > task.pid
</execute-command>
<!-- Example 11: Checking if a process is still running -->
<!-- Example 9: Checking if a process is still running -->
<execute-command>
ps -p $(cat task.pid)
</execute-command>
<!-- Example 12: Killing a background process -->
<!-- Example 10: Killing a background process -->
<execute-command>
kill $(cat task.pid)
</execute-command>
@ -145,7 +135,7 @@ class SandboxShellTool(SandboxToolsBase):
command: str,
folder: Optional[str] = None,
session_name: str = "default",
timeout: int = 180
timeout: int = 60
) -> ToolResult:
try:
# Ensure sandbox is initialized

View File

@ -23,7 +23,7 @@ load_dotenv()
# Initialize managers
db = DBConnection()
thread_manager = None
instance_id = str(uuid.uuid4())[:8] # Generate instance ID at module load time
instance_id = "single"
# Rate limiter state
ip_tracker = OrderedDict()
@ -70,7 +70,9 @@ async def lifespan(app: FastAPI):
# Clean up Redis connection
try:
logger.info("Closing Redis connection")
await redis.close()
logger.info("Redis connection closed successfully")
except Exception as e:
logger.error(f"Error closing Redis connection: {e}")