suna/agentpress/agent/api.py

from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import StreamingResponse
import asyncio
import json
import traceback
from datetime import datetime, timezone
import uuid

from agentpress.framework.thread_manager import ThreadManager
from agentpress.framework.state_manager import StateManager
from agentpress.framework.db_connection import DBConnection
from agentpress.framework import redis_manager
from agentpress.agent.run import run_agent
from agentpress.auth.auth_utils import get_current_user_id

# Initialize shared resources
router = APIRouter()
thread_manager = None
state_manager = None
store_id = None
db = None
instance_id = None

def initialize(
    _thread_manager: ThreadManager,
    _state_manager: StateManager,
    _store_id: str,
    _db: DBConnection
):
    """Initialize the agent API with resources from the main API."""
    global thread_manager, state_manager, store_id, db, instance_id
    thread_manager = _thread_manager
    state_manager = _state_manager
    store_id = _store_id
    db = _db

    # Generate instance ID
    instance_id = str(uuid.uuid4())[:8]

    # Note: Redis will be initialized in the lifespan function in api.py

async def cleanup():
    """Clean up resources and stop running agents on shutdown."""
    # Get Redis client
    redis_client = await redis_manager.get_client()

    # Use the instance_id to find and clean up this instance's keys
    running_keys = await redis_client.keys(f"active_run:{instance_id}:*")

    for key in running_keys:
        agent_run_id = key.split(":")[-1]
        await stop_agent_run(agent_run_id)

    # Close Redis connection
    await redis_manager.close()

async def stop_agent_run(agent_run_id: str):
    """Update database and publish stop signal to Redis."""
    client = await db.client
    redis_client = await redis_manager.get_client()

    # Update the agent run status to stopped
    await client.table('agent_runs').update({
        "status": "stopped",
        "completed_at": datetime.now(timezone.utc).isoformat()
    }).eq("id", agent_run_id).execute()

    # Publish stop signal to the agent run channel as a string
    await redis_client.publish(f"agent_run:{agent_run_id}:control", "STOP")

async def restore_running_agent_runs():
    """Restore any agent runs that were still marked as running in the database."""
    client = await db.client
    running_agent_runs = await client.table('agent_runs').select('*').eq("status", "running").execute()

    for run in running_agent_runs.data:
        await client.table('agent_runs').update({
            "status": "failed",
            "error": "Server restarted while agent was running",
            "completed_at": datetime.now(timezone.utc).isoformat()
        }).eq("id", run['id']).execute()

@router.post("/thread/{thread_id}/agent/start")
async def start_agent(thread_id: str, user_id: str = Depends(get_current_user_id)):
    """Start an agent for a specific thread in the background."""
    client = await db.client
    redis_client = await redis_manager.get_client()

    # Verify user has access to this thread
    thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()

    if not thread.data or len(thread.data) == 0:
        raise HTTPException(status_code=403, detail="Not authorized to access this thread")

    # Create a new agent run
    agent_run = await client.table('agent_runs').insert({
        "thread_id": thread_id,
        "status": "running",
        "started_at": datetime.now(timezone.utc).isoformat(),
        "responses": "[]"  # Initialize with empty array
    }).execute()

    agent_run_id = agent_run.data[0]['id']

    # Register this run in Redis with TTL
    await redis_client.set(
        f"active_run:{instance_id}:{agent_run_id}",
        "running",
        ex=redis_manager.REDIS_KEY_TTL
    )

    # Run the agent in the background
    task = asyncio.create_task(
        run_agent_background(agent_run_id, thread_id, instance_id)
    )

    # Set a callback to clean up when task is done
    task.add_done_callback(
        lambda _: asyncio.create_task(
            _cleanup_agent_run(agent_run_id)
        )
    )

    return {"agent_run_id": agent_run_id, "status": "running"}

async def _cleanup_agent_run(agent_run_id: str):
    """Clean up Redis keys when an agent run is done."""
    redis_client = await redis_manager.get_client()
    await redis_client.delete(f"active_run:{instance_id}:{agent_run_id}")

@router.post("/agent-run/{agent_run_id}/stop")
async def stop_agent(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
    """Stop a running agent."""
    client = await db.client

    # Check if agent run exists and verify user has access to the thread
    agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()

    if not agent_run.data or len(agent_run.data) == 0:
        raise HTTPException(status_code=404, detail="Agent run not found")

    thread_id = agent_run.data[0]['thread_id']

    # Verify user has access to this thread
    thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()

    if not thread.data or len(thread.data) == 0:
        raise HTTPException(status_code=403, detail="Not authorized to access this agent run")

    # Stop the agent run
    await stop_agent_run(agent_run_id)

    return {"status": "stopped"}

@router.get("/agent-run/{agent_run_id}/stream")
async def stream_agent_run(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
    """Stream the responses of an agent run from where they left off."""
    client = await db.client
    redis_client = await redis_manager.get_client()

    # Get agent run data
    agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()

    if not agent_run.data or len(agent_run.data) == 0:
        raise HTTPException(status_code=404, detail="Agent run not found")

    agent_run_data = agent_run.data[0]
    thread_id = agent_run_data['thread_id']

    # Verify user has access to this thread
    thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()

    if not thread.data or len(thread.data) == 0:
        raise HTTPException(status_code=403, detail="Not authorized to access this agent run")

    responses = json.loads(agent_run_data['responses']) if agent_run_data['responses'] else []

    # Create a pubsub to listen for new responses
    pubsub = redis_client.pubsub()
    await pubsub.subscribe(f"agent_run:{agent_run_id}:responses")

    # Define the streaming generator
    async def event_generator():
        try:
            # First send any existing responses
            for response in responses:
                yield f"data: {json.dumps(response)}\n\n"

            # Keep connection alive with ping
            ping_count = 0

            # Then stream new responses
            while True:
                message = await pubsub.get_message(timeout=1.0)
                if message and message["type"] == "message":
                    data = message["data"]

                    # Check if this is the end marker
                    end_stream_marker = "END_STREAM"
                    if data == end_stream_marker or data == end_stream_marker.encode('utf-8'):
                        break

                    # Handle both string and bytes data
                    if isinstance(data, bytes):
                        data_str = data.decode('utf-8')
                    else:
                        data_str = str(data)

                    # Don't add extra formatting to already JSON-formatted data
                    yield f"data: {data_str}\n\n"

                # Send ping every 5 seconds to keep connection alive
                ping_count += 1
                if ping_count >= 5:
                    yield f"data: {json.dumps({'type': 'ping'})}\n\n"
                    ping_count = 0

                # Check if agent is still running
                current_run = await client.table('agent_runs').select('status').eq('id', agent_run_id).execute()
                if not current_run.data or current_run.data[0]['status'] != 'running':
                    break

                await asyncio.sleep(0.1)  # Prevent tight loop

        except asyncio.CancelledError:
            pass
        finally:
            await pubsub.unsubscribe()

    # Return a StreamingResponse with the correct headers
    return StreamingResponse(
        event_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no"
        }
    )

@router.get("/thread/{thread_id}/agent-runs")
async def get_agent_runs(thread_id: str, user_id: str = Depends(get_current_user_id)):
    """Get all agent runs for a thread."""
    client = await db.client

    # Verify user has access to this thread
    thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()

    if not thread.data or len(thread.data) == 0:
        raise HTTPException(status_code=403, detail="Not authorized to access this thread")

    agent_runs = await client.table('agent_runs').select('*').eq("thread_id", thread_id).execute()
    return {"agent_runs": agent_runs.data}

@router.get("/agent-run/{agent_run_id}")
async def get_agent_run(agent_run_id: str, user_id: str = Depends(get_current_user_id)):
    """Get agent run status and responses."""
    client = await db.client
    agent_run = await client.table('agent_runs').select('*').eq('id', agent_run_id).execute()

    if not agent_run.data or len(agent_run.data) == 0:
        raise HTTPException(status_code=404, detail="Agent run not found")

    agent_run_data = agent_run.data[0]
    thread_id = agent_run_data['thread_id']

    # Verify user has access to this thread
    thread = await client.table('threads').select('thread_id').eq('thread_id', thread_id).eq('user_id', user_id).execute()

    if not thread.data or len(thread.data) == 0:
        raise HTTPException(status_code=403, detail="Not authorized to access this agent run")

    responses = json.loads(agent_run_data['responses']) if agent_run_data['responses'] else []

    return {
        "id": agent_run_data['id'],
        "threadId": agent_run_data['thread_id'],
        "status": agent_run_data['status'],
        "startedAt": agent_run_data['started_at'],
        "completedAt": agent_run_data['completed_at'],
        "responses": responses,
        "error": agent_run_data['error']
    }

async def run_agent_background(agent_run_id: str, thread_id: str, instance_id: str):
    """Run the agent in the background and store responses."""
    client = await db.client
    redis_client = await redis_manager.get_client()

    # Create a buffer to store response chunks
    responses = []
    batch = []
    last_db_update = datetime.now(timezone.utc)

    # Create a pubsub to listen for control messages
    pubsub = redis_client.pubsub()
    await pubsub.subscribe(f"agent_run:{agent_run_id}:control")

    # Start a background task to check for stop signals
    stop_signal_received = False

    async def check_for_stop_signal():
        nonlocal stop_signal_received
        while True:
            message = await pubsub.get_message(timeout=1.0)
            if message and message["type"] == "message":
                stop_signal = "STOP"
                if message["data"] == stop_signal or message["data"] == stop_signal.encode('utf-8'):
                    stop_signal_received = True
                    break
            await asyncio.sleep(0.1)  # Small delay to prevent CPU spinning
            if stop_signal_received:  # Check if we should exit
                break

    # Start the stop signal checker
    stop_checker = asyncio.create_task(check_for_stop_signal())

    try:
        # Run the agent and collect responses
        agent_gen = run_agent(thread_id, stream=True,
                      thread_manager=thread_manager, state_manager=state_manager, store_id=store_id)

        async for response in agent_gen:
            # Check if stop signal received
            if stop_signal_received:
                break

            # Format the response properly
            formatted_response = None

            # Handle different types of responses
            if isinstance(response, str):
                # Simple string content
                formatted_response = {"type": "content", "content": response}
            elif isinstance(response, dict):
                if "type" in response:
                    # Already has a type field, use as is
                    formatted_response = response
                else:
                    # Missing type field, add as content type
                    formatted_response = {"type": "content", **response}
            else:
                # Default fallback, convert to string
                formatted_response = {"type": "content", "content": str(response)}

            # Add response to batch and responses list
            responses.append(formatted_response)
            batch.append(formatted_response)

            # Publish the response to Redis - ensure it's a properly formatted JSON string
            await redis_client.publish(
                f"agent_run:{agent_run_id}:responses",
                json.dumps(formatted_response)
            )

            # Update database periodically to avoid too many updates
            now = datetime.now(timezone.utc)
            if (now - last_db_update).total_seconds() >= 1.0 and batch:
                # Update the agent run responses
                await client.table('agent_runs').update({
                    "responses": json.dumps(responses)
                }).eq("id", agent_run_id).execute()

                # Clear the batch and update the last_db_update time
                batch = []
                last_db_update = now

            # Add a small delay to prevent CPU spinning
            await asyncio.sleep(0.01)

        # Final update to database with all responses
        if batch:
            await client.table('agent_runs').update({
                "responses": json.dumps(responses)
            }).eq("id", agent_run_id).execute()

        # Signal all done if we weren't stopped
        if not stop_signal_received:
            await client.table('agent_runs').update({
                "status": "completed",
                "completed_at": datetime.now(timezone.utc).isoformat()
            }).eq("id", agent_run_id).execute()

            # Send END_STREAM signal
            end_stream_marker = "END_STREAM"
            await redis_client.publish(
                f"agent_run:{agent_run_id}:responses",
                end_stream_marker
            )

    except Exception as e:
        # Log the error and update the agent run
        error_message = str(e)
        traceback_str = traceback.format_exc()
        print(f"Error in agent run {agent_run_id}: {error_message}\n{traceback_str}")

        # Update the agent run with the error
        await client.table('agent_runs').update({
            "status": "failed",
            "error": f"{error_message}\n{traceback_str}",
            "completed_at": datetime.now(timezone.utc).isoformat()
        }).eq("id", agent_run_id).execute()

        # Send END_STREAM signal
        end_stream_marker = "END_STREAM"
        await redis_client.publish(
            f"agent_run:{agent_run_id}:responses",
            end_stream_marker
        )
    finally:
        # Ensure we always clean up the pubsub and stop checker
        stop_checker.cancel()
        await pubsub.unsubscribe()

        # Make sure we mark the run as completed or failed if it was still running
        current_run = await client.table('agent_runs').select('status').eq("id", agent_run_id).execute()
        if current_run.data and current_run.data[0]['status'] == 'running':
            await client.table('agent_runs').update({
                "status": "failed" if stop_signal_received else "completed",
                "completed_at": datetime.now(timezone.utc).isoformat()
            }).eq("id", agent_run_id).execute()