2025-06-26 03:57:28 +08:00
|
|
|
import dotenv
|
2025-07-06 01:24:11 +08:00
|
|
|
|
2025-06-26 03:57:28 +08:00
|
|
|
dotenv.load_dotenv(".env")
|
|
|
|
|
2025-05-27 18:04:17 +08:00
|
|
|
import sentry
|
2025-05-14 20:48:02 +08:00
|
|
|
import asyncio
|
|
|
|
import traceback
|
|
|
|
from datetime import datetime, timezone
|
2025-07-08 04:27:45 +08:00
|
|
|
from typing import Optional, List, Dict, Any, AsyncIterable
|
2025-05-14 20:48:02 +08:00
|
|
|
from services import redis
|
|
|
|
from agent.run import run_agent
|
2025-06-19 03:20:15 +08:00
|
|
|
from utils.logger import logger, structlog
|
2025-05-14 20:48:02 +08:00
|
|
|
import uuid
|
|
|
|
from services.supabase import DBConnection
|
2025-05-21 08:39:28 +08:00
|
|
|
from services.langfuse import langfuse
|
2025-06-10 12:52:09 +08:00
|
|
|
from utils.retry import retry
|
2025-07-06 01:24:11 +08:00
|
|
|
from typing import AsyncGenerator
|
|
|
|
import json
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
_initialized = False
|
|
|
|
db = DBConnection()
|
|
|
|
instance_id = "single"
|
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
|
2025-07-08 04:27:45 +08:00
|
|
|
# Create stream broadcaster for multiple consumers
|
|
|
|
class StreamBroadcaster:
|
|
|
|
def __init__(self, source: AsyncIterable[Any]):
|
|
|
|
self.source = source
|
|
|
|
self.queues: List[asyncio.Queue] = []
|
|
|
|
|
|
|
|
def add_consumer(self) -> asyncio.Queue:
|
|
|
|
q: asyncio.Queue = asyncio.Queue()
|
|
|
|
self.queues.append(q)
|
|
|
|
return q
|
|
|
|
|
|
|
|
async def start(self) -> None:
|
|
|
|
async for chunk in self.source:
|
|
|
|
for q in self.queues:
|
|
|
|
await q.put(chunk)
|
|
|
|
for q in self.queues:
|
|
|
|
await q.put(None) # Sentinel to close consumers
|
|
|
|
|
|
|
|
# Consumer wrapper as an async generator
|
|
|
|
@staticmethod
|
|
|
|
async def queue_to_stream(queue: asyncio.Queue) -> AsyncIterable[Any]:
|
|
|
|
while True:
|
|
|
|
chunk = await queue.get()
|
|
|
|
if chunk is None:
|
|
|
|
break
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
# Print consumer task
|
|
|
|
@staticmethod
|
|
|
|
async def iterate_bg(queue: asyncio.Queue) -> None:
|
|
|
|
while True:
|
|
|
|
chunk = await queue.get()
|
|
|
|
if chunk is None:
|
|
|
|
break
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2025-05-14 20:48:02 +08:00
|
|
|
async def initialize():
|
|
|
|
"""Initialize the agent API with resources from the main API."""
|
2025-07-04 19:58:38 +08:00
|
|
|
global db, instance_id, _initialized
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
if not instance_id:
|
|
|
|
instance_id = str(uuid.uuid4())[:8]
|
2025-06-10 12:52:09 +08:00
|
|
|
await retry(lambda: redis.initialize_async())
|
2025-05-14 20:48:02 +08:00
|
|
|
await db.initialize()
|
|
|
|
|
|
|
|
_initialized = True
|
|
|
|
logger.info(f"Initialized agent API with instance ID: {instance_id}")
|
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
|
2025-06-29 03:33:42 +08:00
|
|
|
async def check_health(key: str):
|
2025-07-06 01:24:11 +08:00
|
|
|
"""Health check function."""
|
2025-06-29 03:33:42 +08:00
|
|
|
structlog.contextvars.clear_contextvars()
|
|
|
|
await redis.set(key, "healthy", ex=redis.REDIS_KEY_TTL)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
|
2025-07-08 04:27:45 +08:00
|
|
|
async def run_agent_run_stream(
|
2025-05-14 20:48:02 +08:00
|
|
|
agent_run_id: str,
|
|
|
|
thread_id: str,
|
2025-07-06 01:24:11 +08:00
|
|
|
instance_id: str,
|
2025-05-14 20:48:02 +08:00
|
|
|
project_id: str,
|
|
|
|
model_name: str,
|
|
|
|
enable_thinking: Optional[bool],
|
|
|
|
reasoning_effort: Optional[str],
|
|
|
|
stream: bool,
|
2025-05-24 15:08:41 +08:00
|
|
|
enable_context_manager: bool,
|
2025-05-31 15:08:39 +08:00
|
|
|
agent_config: Optional[dict] = None,
|
2025-05-31 23:31:20 +08:00
|
|
|
is_agent_builder: Optional[bool] = False,
|
2025-06-19 03:20:15 +08:00
|
|
|
target_agent_id: Optional[str] = None,
|
|
|
|
request_id: Optional[str] = None,
|
2025-07-06 01:24:11 +08:00
|
|
|
) -> AsyncGenerator[Dict[Any, Any], None]:
|
|
|
|
"""Run the agent in the background and yield responses as they come."""
|
2025-06-19 03:20:15 +08:00
|
|
|
structlog.contextvars.clear_contextvars()
|
|
|
|
structlog.contextvars.bind_contextvars(
|
|
|
|
agent_run_id=agent_run_id,
|
|
|
|
thread_id=thread_id,
|
|
|
|
request_id=request_id,
|
|
|
|
)
|
|
|
|
|
2025-06-06 13:54:30 +08:00
|
|
|
try:
|
|
|
|
await initialize()
|
|
|
|
except Exception as e:
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.critical(f"Failed to initialize: {e}")
|
2025-06-06 13:54:30 +08:00
|
|
|
raise e
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-05-27 18:04:17 +08:00
|
|
|
sentry.sentry.set_tag("thread_id", thread_id)
|
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.info(
|
|
|
|
f"Starting agent run: {agent_run_id} for thread: {thread_id} (Instance: {instance_id})"
|
|
|
|
)
|
|
|
|
logger.info(
|
|
|
|
{
|
|
|
|
"model_name": model_name,
|
|
|
|
"enable_thinking": enable_thinking,
|
|
|
|
"reasoning_effort": reasoning_effort,
|
|
|
|
"stream": stream,
|
|
|
|
"enable_context_manager": enable_context_manager,
|
|
|
|
"agent_config": agent_config,
|
|
|
|
"is_agent_builder": is_agent_builder,
|
|
|
|
"target_agent_id": target_agent_id,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
logger.info(
|
|
|
|
f"🚀 Using model: {model_name} (thinking: {enable_thinking}, reasoning_effort: {reasoning_effort})"
|
|
|
|
)
|
2025-05-24 15:08:41 +08:00
|
|
|
if agent_config:
|
|
|
|
logger.info(f"Using custom agent: {agent_config.get('name', 'Unknown')}")
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
client = await db.client
|
|
|
|
start_time = datetime.now(timezone.utc)
|
2025-07-06 01:24:11 +08:00
|
|
|
all_responses = [] # Keep for DB updates
|
|
|
|
|
|
|
|
trace = langfuse.trace(
|
|
|
|
name="agent_run",
|
|
|
|
id=agent_run_id,
|
|
|
|
session_id=thread_id,
|
|
|
|
metadata={"project_id": project_id, "instance_id": instance_id},
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
stop_signal_received = False
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
stop_redis_key = f"stop_signal:{agent_run_id}"
|
2025-07-08 04:27:45 +08:00
|
|
|
|
2025-05-14 20:48:02 +08:00
|
|
|
async def check_for_stop_signal():
|
|
|
|
nonlocal stop_signal_received
|
|
|
|
try:
|
|
|
|
while not stop_signal_received:
|
2025-07-06 01:24:11 +08:00
|
|
|
message = await redis.client.get(stop_redis_key)
|
|
|
|
if message == "STOP":
|
2025-07-08 04:27:45 +08:00
|
|
|
logger.info(
|
|
|
|
f"Received STOP signal for agent run {agent_run_id} (Instance: {instance_id})"
|
|
|
|
)
|
2025-07-06 01:24:11 +08:00
|
|
|
stop_signal_received = True
|
|
|
|
break
|
2025-07-08 04:27:45 +08:00
|
|
|
await asyncio.sleep(0.5) # Short sleep to prevent tight loop
|
2025-05-14 20:48:02 +08:00
|
|
|
except asyncio.CancelledError:
|
2025-07-08 04:27:45 +08:00
|
|
|
logger.info(
|
|
|
|
f"Stop signal checker cancelled for {agent_run_id} (Instance: {instance_id})"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
except Exception as e:
|
2025-07-08 04:27:45 +08:00
|
|
|
logger.error(
|
|
|
|
f"Error in stop signal checker for {agent_run_id}: {e}", exc_info=True
|
|
|
|
)
|
|
|
|
stop_signal_received = True # Stop the run if the checker fails
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
asyncio.create_task(check_for_stop_signal())
|
2025-05-21 08:39:28 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
try:
|
2025-05-14 20:48:02 +08:00
|
|
|
# Initialize agent generator
|
|
|
|
agent_gen = run_agent(
|
2025-07-06 01:24:11 +08:00
|
|
|
thread_id=thread_id,
|
|
|
|
project_id=project_id,
|
|
|
|
stream=stream,
|
2025-05-24 20:41:59 +08:00
|
|
|
model_name=model_name,
|
2025-07-06 01:24:11 +08:00
|
|
|
enable_thinking=enable_thinking,
|
|
|
|
reasoning_effort=reasoning_effort,
|
2025-05-24 15:08:41 +08:00
|
|
|
enable_context_manager=enable_context_manager,
|
2025-05-24 16:15:44 +08:00
|
|
|
agent_config=agent_config,
|
2025-05-31 15:08:39 +08:00
|
|
|
trace=trace,
|
2025-05-31 23:31:20 +08:00
|
|
|
is_agent_builder=is_agent_builder,
|
2025-07-06 01:24:11 +08:00
|
|
|
target_agent_id=target_agent_id,
|
2025-05-14 20:48:02 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
final_status = "running"
|
|
|
|
error_message = None
|
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
# Yield responses from the agent stream
|
2025-05-14 20:48:02 +08:00
|
|
|
async for response in agent_gen:
|
|
|
|
if stop_signal_received:
|
|
|
|
logger.info(f"Agent run {agent_run_id} stopped by signal.")
|
|
|
|
final_status = "stopped"
|
2025-07-08 04:27:45 +08:00
|
|
|
trace.span(name="agent_run_stopped").end(
|
|
|
|
status_message="agent_run_stopped", level="WARNING"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
break
|
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
all_responses.append(response) # Keep for DB updates
|
|
|
|
if isinstance(response, dict):
|
|
|
|
yield f"data: {json.dumps(response)}\n\n"
|
|
|
|
else:
|
|
|
|
yield f"data: {response}\n\n"
|
2025-07-08 04:27:45 +08:00
|
|
|
|
2025-05-14 20:48:02 +08:00
|
|
|
# Check for agent-signaled completion or error
|
2025-07-06 01:24:11 +08:00
|
|
|
if response.get("type") == "status":
|
|
|
|
status_val = response.get("status")
|
|
|
|
if status_val in ["completed", "failed", "stopped"]:
|
|
|
|
logger.info(
|
|
|
|
f"Agent run {agent_run_id} finished via status message: {status_val}"
|
|
|
|
)
|
|
|
|
final_status = status_val
|
|
|
|
if status_val == "failed" or status_val == "stopped":
|
|
|
|
error_message = response.get(
|
|
|
|
"message", f"Run ended with status: {status_val}"
|
|
|
|
)
|
|
|
|
break
|
|
|
|
|
|
|
|
# If loop finished without explicit completion/error, mark as completed
|
2025-05-14 20:48:02 +08:00
|
|
|
if final_status == "running":
|
2025-07-06 01:24:11 +08:00
|
|
|
final_status = "completed"
|
|
|
|
duration = (datetime.now(timezone.utc) - start_time).total_seconds()
|
|
|
|
logger.info(
|
|
|
|
f"Agent run {agent_run_id} completed normally (duration: {duration:.2f}s, responses: {len(all_responses)})"
|
|
|
|
)
|
|
|
|
completion_message = {
|
|
|
|
"type": "status",
|
|
|
|
"status": "completed",
|
|
|
|
"message": "Agent run completed successfully",
|
|
|
|
}
|
|
|
|
trace.span(name="agent_run_completed").end(
|
|
|
|
status_message="agent_run_completed"
|
|
|
|
)
|
|
|
|
all_responses.append(completion_message)
|
|
|
|
yield f"data: {json.dumps(completion_message)}\n\n"
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
# Update DB status
|
2025-07-06 01:24:11 +08:00
|
|
|
await update_agent_run_status(
|
|
|
|
client,
|
|
|
|
agent_run_id,
|
|
|
|
final_status,
|
|
|
|
error=error_message,
|
|
|
|
responses=all_responses,
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
error_message = str(e)
|
|
|
|
traceback_str = traceback.format_exc()
|
|
|
|
duration = (datetime.now(timezone.utc) - start_time).total_seconds()
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.error(
|
|
|
|
f"Error in agent run {agent_run_id} after {duration:.2f}s: {error_message}\n{traceback_str} (Instance: {instance_id})"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
final_status = "failed"
|
2025-07-06 01:24:11 +08:00
|
|
|
trace.span(name="agent_run_failed").end(
|
|
|
|
status_message=error_message, level="ERROR"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
2025-07-06 01:24:11 +08:00
|
|
|
# Add and yield error response
|
2025-05-14 20:48:02 +08:00
|
|
|
error_response = {"type": "status", "status": "error", "message": error_message}
|
2025-07-06 01:24:11 +08:00
|
|
|
all_responses.append(error_response)
|
|
|
|
yield f"data: {json.dumps(error_response)}\n\n"
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
# Update DB status
|
2025-07-06 01:24:11 +08:00
|
|
|
await update_agent_run_status(
|
|
|
|
client,
|
|
|
|
agent_run_id,
|
|
|
|
"failed",
|
|
|
|
error=f"{error_message}\n{traceback_str}",
|
|
|
|
responses=all_responses,
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
finally:
|
2025-07-06 01:35:41 +08:00
|
|
|
instance_key = f"active_run:{instance_id}:{agent_run_id}"
|
|
|
|
await redis.client.delete(instance_key)
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.info(
|
|
|
|
f"Agent run completed for: {agent_run_id} (Instance: {instance_id}) with final status: {final_status}"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
async def update_agent_run_status(
|
|
|
|
client,
|
|
|
|
agent_run_id: str,
|
|
|
|
status: str,
|
|
|
|
error: Optional[str] = None,
|
2025-07-06 01:24:11 +08:00
|
|
|
responses: Optional[List[Dict[Any, Any]]] = None,
|
2025-05-14 20:48:02 +08:00
|
|
|
) -> bool:
|
|
|
|
"""
|
|
|
|
Centralized function to update agent run status.
|
|
|
|
Returns True if update was successful.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
update_data = {
|
|
|
|
"status": status,
|
2025-07-06 01:24:11 +08:00
|
|
|
"completed_at": datetime.now(timezone.utc).isoformat(),
|
2025-05-14 20:48:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if error:
|
|
|
|
update_data["error"] = error
|
|
|
|
|
|
|
|
if responses:
|
|
|
|
# Ensure responses are stored correctly as JSONB
|
|
|
|
update_data["responses"] = responses
|
|
|
|
|
|
|
|
# Retry up to 3 times
|
|
|
|
for retry in range(3):
|
|
|
|
try:
|
2025-07-06 01:24:11 +08:00
|
|
|
update_result = (
|
|
|
|
await client.table("agent_runs")
|
|
|
|
.update(update_data)
|
|
|
|
.eq("id", agent_run_id)
|
|
|
|
.execute()
|
|
|
|
)
|
|
|
|
|
|
|
|
if hasattr(update_result, "data") and update_result.data:
|
|
|
|
logger.info(
|
|
|
|
f"Successfully updated agent run {agent_run_id} status to '{status}' (retry {retry})"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
|
|
|
|
# Verify the update
|
2025-07-06 01:24:11 +08:00
|
|
|
verify_result = (
|
|
|
|
await client.table("agent_runs")
|
|
|
|
.select("status", "completed_at")
|
|
|
|
.eq("id", agent_run_id)
|
|
|
|
.execute()
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
if verify_result.data:
|
2025-07-06 01:24:11 +08:00
|
|
|
actual_status = verify_result.data[0].get("status")
|
|
|
|
completed_at = verify_result.data[0].get("completed_at")
|
|
|
|
logger.info(
|
|
|
|
f"Verified agent run update: status={actual_status}, completed_at={completed_at}"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
return True
|
|
|
|
else:
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.warning(
|
|
|
|
f"Database update returned no data for agent run {agent_run_id} on retry {retry}: {update_result}"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
if retry == 2: # Last retry
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.error(
|
|
|
|
f"Failed to update agent run status after all retries: {agent_run_id}"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
return False
|
|
|
|
except Exception as db_error:
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.error(
|
|
|
|
f"Database error on retry {retry} updating status for {agent_run_id}: {str(db_error)}"
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
if retry < 2: # Not the last retry yet
|
2025-07-06 01:24:11 +08:00
|
|
|
await asyncio.sleep(0.5 * (2**retry)) # Exponential backoff
|
2025-05-14 20:48:02 +08:00
|
|
|
else:
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.error(
|
|
|
|
f"Failed to update agent run status after all retries: {agent_run_id}",
|
|
|
|
exc_info=True,
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
return False
|
|
|
|
except Exception as e:
|
2025-07-06 01:24:11 +08:00
|
|
|
logger.error(
|
|
|
|
f"Unexpected error updating agent run status for {agent_run_id}: {str(e)}",
|
|
|
|
exc_info=True,
|
|
|
|
)
|
2025-05-14 20:48:02 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
return False
|