mirror of https://github.com/kortix-ai/suna.git
422 lines
16 KiB
Python
422 lines
16 KiB
Python
from fastapi import APIRouter, HTTPException, Depends
|
|
from fastapi.responses import StreamingResponse
|
|
import asyncio
|
|
import json
|
|
import traceback
|
|
from datetime import datetime, timezone
|
|
import uuid
|
|
|
|
from agentpress.framework.thread_manager import ThreadManager
|
|
from agentpress.framework.state_manager import StateManager
|
|
from agentpress.framework.db_connection import DBConnection
|
|
from agentpress.framework import redis_manager
|
|
from agentpress.agent.run import run_agent
|
|
from agentpress.auth.auth_utils import get_current_user_id
|
|
|
|
# Initialize shared resources
|
|
router = APIRouter()
|
|
thread_manager = None
|
|
state_manager = None
|
|
store_id = None
|
|
db = None
|
|
instance_id = None
|
|
|
|
def initialize(
|
|
_thread_manager: ThreadManager,
|
|
_state_manager: StateManager,
|
|
_store_id: str,
|
|
_db: DBConnection
|
|
):
|
|
"""Initialize the agent API with resources from the main API."""
|
|
global thread_manager, state_manager, store_id, db, instance_id
|
|
thread_manager = _thread_manager
|
|
state_manager = _state_manager
|
|
store_id = _store_id
|
|
db = _db
|
|
|
|
# Generate instance ID
|
|
instance_id = str(uuid.uuid4())[:8]
|
|
|
|
# Note: Redis will be initialized in the lifespan function in api.py
|
|
|
|
async def cleanup():
|
|
"""Clean up resources and stop running agents on shutdown."""
|
|
# Get Redis client
|
|
redis_client = await redis_manager.get_client()
|
|
|
|
# Use the instance_id to find and clean up this instance's keys
|
|
running_keys = await redis_client.keys(f"active_run:{instance_id}:*")
|
|
|
|
for key in running_keys:
|
|
agent_run_id = key.split(":")[-1]
|
|
await stop_agent_run(agent_run_id)
|
|
|
|
# Close Redis connection
|
|
await redis_manager.close()
|
|
|
|
async def stop_agent_run(agent_run_id: str):
|
|
"""Update database and publish stop signal to Redis."""
|
|
client = await db.client
|
|
redis_client = await redis_manager.get_client()
|
|
|
|
# Update the agent run status to stopped
|
|
await client.table('agent_runs').update({
|
|
"status": "stopped",
|
|
"completed_at": datetime.now(timezone.utc).isoformat()
|
|
}).eq("id", agent_run_id).execute()
|
|
|
|
# Publish stop signal to the agent run channel as a string
|
|
await redis_client.publish(f"agent_run:{agent_run_id}:control", "STOP")
|
|
|
|
async def restore_running_agent_runs():
|
|
"""Restore any agent runs that were still marked as running in the database."""
|
|
client = await db.client
|
|
running_agent_runs = await client.table('agent_runs').select('*').eq("status", "running").execute()
|
|
|
|
for run in running_agent_runs.data:
|
|
await client.table('agent_runs').update({
|
|
"status": "failed",
|
|
"error": "Server restarted while agent was running",
|
|
"completed_at": datetime.now(timezone.utc).isoformat()
|
|
}).eq("id", run['id']).execute()
|
|
|
|
@router.post("/thread/{thread_id}/agent/start")
|
|
async def start_agent(thread_id: str, user_id: str = Depends(get_current_user_id)):
|
|
"""Start an agent for a specific thread in the background."""
|
|
client = await db.client
|
|
redis_client = await redis_manager.get_client()
|
|
|
|
# Verify user has access to this thread
|
|
thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()
|
|
|
|
if not thread.data or len(thread.data) == 0:
|
|
raise HTTPException(status_code=403, detail="Not authorized to access this thread")
|
|
|
|
# Create a new agent run
|
|
agent_run = await client.table('agent_runs').insert({
|
|
"thread_id": thread_id,
|
|
"status": "running",
|
|
"started_at": datetime.now(timezone.utc).isoformat(),
|
|
"responses": "[]" # Initialize with empty array
|
|
}).execute()
|
|
|
|
agent_run_id = agent_run.data[0]['id']
|
|
|
|
# Register this run in Redis with TTL
|
|
await redis_client.set(
|
|
f"active_run:{instance_id}:{agent_run_id}",
|
|
"running",
|
|
ex=redis_manager.REDIS_KEY_TTL
|
|
)
|
|
|
|
# Run the agent in the background
|
|
task = asyncio.create_task(
|
|
run_agent_background(agent_run_id, thread_id, instance_id)
|
|
)
|
|
|
|
# Set a callback to clean up when task is done
|
|
task.add_done_callback(
|
|
lambda _: asyncio.create_task(
|
|
_cleanup_agent_run(agent_run_id)
|
|
)
|
|
)
|
|
|
|
return {"agent_run_id": agent_run_id, "status": "running"}
|
|
|
|
async def _cleanup_agent_run(agent_run_id: str):
|
|
"""Clean up Redis keys when an agent run is done."""
|
|
redis_client = await redis_manager.get_client()
|
|
await redis_client.delete(f"active_run:{instance_id}:{agent_run_id}")
|
|
|
|
@router.post("/agent-run/{agent_run_id}/stop")
|
|
async def stop_agent(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
|
|
"""Stop a running agent."""
|
|
client = await db.client
|
|
|
|
# Check if agent run exists and verify user has access to the thread
|
|
agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()
|
|
|
|
if not agent_run.data or len(agent_run.data) == 0:
|
|
raise HTTPException(status_code=404, detail="Agent run not found")
|
|
|
|
thread_id = agent_run.data[0]['thread_id']
|
|
|
|
# Verify user has access to this thread
|
|
thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()
|
|
|
|
if not thread.data or len(thread.data) == 0:
|
|
raise HTTPException(status_code=403, detail="Not authorized to access this agent run")
|
|
|
|
# Stop the agent run
|
|
await stop_agent_run(agent_run_id)
|
|
|
|
return {"status": "stopped"}
|
|
|
|
@router.get("/agent-run/{agent_run_id}/stream")
|
|
async def stream_agent_run(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
|
|
"""Stream the responses of an agent run from where they left off."""
|
|
client = await db.client
|
|
redis_client = await redis_manager.get_client()
|
|
|
|
# Get agent run data
|
|
agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()
|
|
|
|
if not agent_run.data or len(agent_run.data) == 0:
|
|
raise HTTPException(status_code=404, detail="Agent run not found")
|
|
|
|
agent_run_data = agent_run.data[0]
|
|
thread_id = agent_run_data['thread_id']
|
|
|
|
# Verify user has access to this thread
|
|
thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()
|
|
|
|
if not thread.data or len(thread.data) == 0:
|
|
raise HTTPException(status_code=403, detail="Not authorized to access this agent run")
|
|
|
|
responses = json.loads(agent_run_data['responses']) if agent_run_data['responses'] else []
|
|
|
|
# Create a pubsub to listen for new responses
|
|
pubsub = redis_client.pubsub()
|
|
await pubsub.subscribe(f"agent_run:{agent_run_id}:responses")
|
|
|
|
# Define the streaming generator
|
|
async def event_generator():
|
|
try:
|
|
# First send any existing responses
|
|
for response in responses:
|
|
yield f"data: {json.dumps(response)}\n\n"
|
|
|
|
# Keep connection alive with ping
|
|
ping_count = 0
|
|
|
|
# Then stream new responses
|
|
while True:
|
|
message = await pubsub.get_message(timeout=1.0)
|
|
if message and message["type"] == "message":
|
|
data = message["data"]
|
|
|
|
# Check if this is the end marker
|
|
end_stream_marker = "END_STREAM"
|
|
if data == end_stream_marker or data == end_stream_marker.encode('utf-8'):
|
|
break
|
|
|
|
# Handle both string and bytes data
|
|
if isinstance(data, bytes):
|
|
data_str = data.decode('utf-8')
|
|
else:
|
|
data_str = str(data)
|
|
|
|
# Don't add extra formatting to already JSON-formatted data
|
|
yield f"data: {data_str}\n\n"
|
|
|
|
# Send ping every 5 seconds to keep connection alive
|
|
ping_count += 1
|
|
if ping_count >= 5:
|
|
yield f"data: {json.dumps({'type': 'ping'})}\n\n"
|
|
ping_count = 0
|
|
|
|
# Check if agent is still running
|
|
current_run = await client.table('agent_runs').select('status').eq('id', agent_run_id).execute()
|
|
if not current_run.data or current_run.data[0]['status'] != 'running':
|
|
break
|
|
|
|
await asyncio.sleep(0.1) # Prevent tight loop
|
|
|
|
except asyncio.CancelledError:
|
|
pass
|
|
finally:
|
|
await pubsub.unsubscribe()
|
|
|
|
# Return a StreamingResponse with the correct headers
|
|
return StreamingResponse(
|
|
event_generator(),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no"
|
|
}
|
|
)
|
|
|
|
@router.get("/thread/{thread_id}/agent-runs")
|
|
async def get_agent_runs(thread_id: str, user_id: str = Depends(get_current_user_id)):
|
|
"""Get all agent runs for a thread."""
|
|
client = await db.client
|
|
|
|
# Verify user has access to this thread
|
|
thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()
|
|
|
|
if not thread.data or len(thread.data) == 0:
|
|
raise HTTPException(status_code=403, detail="Not authorized to access this thread")
|
|
|
|
agent_runs = await client.table('agent_runs').select('*').eq("thread_id", thread_id).execute()
|
|
return {"agent_runs": agent_runs.data}
|
|
|
|
@router.get("/agent-run/{agent_run_id}")
|
|
async def get_agent_run(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
|
|
"""Get agent run status and responses."""
|
|
client = await db.client
|
|
agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()
|
|
|
|
if not agent_run.data or len(agent_run.data) == 0:
|
|
raise HTTPException(status_code=404, detail="Agent run not found")
|
|
|
|
agent_run_data = agent_run.data[0]
|
|
thread_id = agent_run_data['thread_id']
|
|
|
|
# Verify user has access to this thread
|
|
thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()
|
|
|
|
if not thread.data or len(thread.data) == 0:
|
|
raise HTTPException(status_code=403, detail="Not authorized to access this agent run")
|
|
|
|
responses = json.loads(agent_run_data['responses']) if agent_run_data['responses'] else []
|
|
|
|
return {
|
|
"id": agent_run_data['id'],
|
|
"threadId": agent_run_data['thread_id'],
|
|
"status": agent_run_data['status'],
|
|
"startedAt": agent_run_data['started_at'],
|
|
"completedAt": agent_run_data['completed_at'],
|
|
"responses": responses,
|
|
"error": agent_run_data['error']
|
|
}
|
|
|
|
async def run_agent_background(agent_run_id: str, thread_id: str, instance_id: str):
|
|
"""Run the agent in the background and store responses."""
|
|
client = await db.client
|
|
redis_client = await redis_manager.get_client()
|
|
|
|
# Create a buffer to store response chunks
|
|
responses = []
|
|
batch = []
|
|
last_db_update = datetime.now(timezone.utc)
|
|
|
|
# Create a pubsub to listen for control messages
|
|
pubsub = redis_client.pubsub()
|
|
await pubsub.subscribe(f"agent_run:{agent_run_id}:control")
|
|
|
|
# Start a background task to check for stop signals
|
|
stop_signal_received = False
|
|
|
|
async def check_for_stop_signal():
|
|
nonlocal stop_signal_received
|
|
while True:
|
|
message = await pubsub.get_message(timeout=1.0)
|
|
if message and message["type"] == "message":
|
|
stop_signal = "STOP"
|
|
if message["data"] == stop_signal or message["data"] == stop_signal.encode('utf-8'):
|
|
stop_signal_received = True
|
|
break
|
|
await asyncio.sleep(0.1) # Small delay to prevent CPU spinning
|
|
if stop_signal_received: # Check if we should exit
|
|
break
|
|
|
|
# Start the stop signal checker
|
|
stop_checker = asyncio.create_task(check_for_stop_signal())
|
|
|
|
try:
|
|
# Run the agent and collect responses
|
|
agent_gen = run_agent(thread_id, stream=True,
|
|
thread_manager=thread_manager, state_manager=state_manager, store_id=store_id)
|
|
|
|
async for response in agent_gen:
|
|
# Check if stop signal received
|
|
if stop_signal_received:
|
|
break
|
|
|
|
# Format the response properly
|
|
formatted_response = None
|
|
|
|
# Handle different types of responses
|
|
if isinstance(response, str):
|
|
# Simple string content
|
|
formatted_response = {"type": "content", "content": response}
|
|
elif isinstance(response, dict):
|
|
if "type" in response:
|
|
# Already has a type field, use as is
|
|
formatted_response = response
|
|
else:
|
|
# Missing type field, add as content type
|
|
formatted_response = {"type": "content", **response}
|
|
else:
|
|
# Default fallback, convert to string
|
|
formatted_response = {"type": "content", "content": str(response)}
|
|
|
|
# Add response to batch and responses list
|
|
responses.append(formatted_response)
|
|
batch.append(formatted_response)
|
|
|
|
# Publish the response to Redis - ensure it's a properly formatted JSON string
|
|
await redis_client.publish(
|
|
f"agent_run:{agent_run_id}:responses",
|
|
json.dumps(formatted_response)
|
|
)
|
|
|
|
# Update database periodically to avoid too many updates
|
|
now = datetime.now(timezone.utc)
|
|
if (now - last_db_update).total_seconds() >= 1.0 and batch:
|
|
# Update the agent run responses
|
|
await client.table('agent_runs').update({
|
|
"responses": json.dumps(responses)
|
|
}).eq("id", agent_run_id).execute()
|
|
|
|
# Clear the batch and update the last_db_update time
|
|
batch = []
|
|
last_db_update = now
|
|
|
|
# Add a small delay to prevent CPU spinning
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Final update to database with all responses
|
|
if batch:
|
|
await client.table('agent_runs').update({
|
|
"responses": json.dumps(responses)
|
|
}).eq("id", agent_run_id).execute()
|
|
|
|
# Signal all done if we weren't stopped
|
|
if not stop_signal_received:
|
|
await client.table('agent_runs').update({
|
|
"status": "completed",
|
|
"completed_at": datetime.now(timezone.utc).isoformat()
|
|
}).eq("id", agent_run_id).execute()
|
|
|
|
# Send END_STREAM signal
|
|
end_stream_marker = "END_STREAM"
|
|
await redis_client.publish(
|
|
f"agent_run:{agent_run_id}:responses",
|
|
end_stream_marker
|
|
)
|
|
|
|
except Exception as e:
|
|
# Log the error and update the agent run
|
|
error_message = str(e)
|
|
traceback_str = traceback.format_exc()
|
|
print(f"Error in agent run {agent_run_id}: {error_message}\n{traceback_str}")
|
|
|
|
# Update the agent run with the error
|
|
await client.table('agent_runs').update({
|
|
"status": "failed",
|
|
"error": f"{error_message}\n{traceback_str}",
|
|
"completed_at": datetime.now(timezone.utc).isoformat()
|
|
}).eq("id", agent_run_id).execute()
|
|
|
|
# Send END_STREAM signal
|
|
end_stream_marker = "END_STREAM"
|
|
await redis_client.publish(
|
|
f"agent_run:{agent_run_id}:responses",
|
|
end_stream_marker
|
|
)
|
|
finally:
|
|
# Ensure we always clean up the pubsub and stop checker
|
|
stop_checker.cancel()
|
|
await pubsub.unsubscribe()
|
|
|
|
# Make sure we mark the run as completed or failed if it was still running
|
|
current_run = await client.table('agent_runs').select('status').eq("id", agent_run_id).execute()
|
|
if current_run.data and current_run.data[0]['status'] == 'running':
|
|
await client.table('agent_runs').update({
|
|
"status": "failed" if stop_signal_received else "completed",
|
|
"completed_at": datetime.now(timezone.utc).isoformat()
|
|
}).eq("id", agent_run_id).execute()
|