From 4bbc03f674a3133204e402bc646572671c4ee3a1 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 6 Jul 2025 06:40:44 +0200 Subject: [PATCH] wip --- backend/agent/api.py | 2 +- backend/agent/run.py | 283 +------------------------- backend/agentpress/context_manager.py | 7 +- backend/api.py | 7 +- backend/services/billing.py | 2 +- 5 files changed, 10 insertions(+), 291 deletions(-) diff --git a/backend/agent/api.py b/backend/agent/api.py index 3c110da4..6ddc3669 100644 --- a/backend/agent/api.py +++ b/backend/agent/api.py @@ -32,7 +32,6 @@ instance_id = None # Global instance ID for this backend instance # TTL for Redis response lists (24 hours) REDIS_RESPONSE_LIST_TTL = 3600 * 24 - class AgentStartRequest(BaseModel): model_name: Optional[str] = None # Will be set from config.MODEL_TO_USE in the endpoint enable_thinking: Optional[bool] = False @@ -1798,6 +1797,7 @@ async def delete_agent(agent_id: str, user_id: str = Depends(get_current_user_id logger.error(f"Error deleting agent {agent_id}: {str(e)}") raise HTTPException(status_code=500, detail="Internal server error") + # Marketplace Models class MarketplaceAgent(BaseModel): agent_id: str diff --git a/backend/agent/run.py b/backend/agent/run.py index bc5edc60..12838579 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -603,285 +603,4 @@ async def run_agent( break generation.end(output=full_response) - langfuse.flush() # Flush Langfuse events at the end of the run - - - -# # TESTING - -# async def test_agent(): -# """Test function to run the agent with a sample query""" -# from agentpress.thread_manager import ThreadManager -# from services.supabase import DBConnection - -# # Initialize ThreadManager -# thread_manager = ThreadManager() - -# # Create a test thread directly with Postgres function -# client = await DBConnection().client - -# try: -# # Get user's personal account -# account_result = await client.rpc('get_personal_account').execute() - -# # if not account_result.data: -# # print("Error: No personal account found") -# # return - -# account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59" - -# if not account_id: -# print("Error: Could not get account ID") -# return - -# # Find or create a test project in the user's account -# project_result = await client.table('projects').select('*').eq('name', 'test11').eq('account_id', account_id).execute() - -# if project_result.data and len(project_result.data) > 0: -# # Use existing test project -# project_id = project_result.data[0]['project_id'] -# print(f"\nšŸ”„ Using existing test project: {project_id}") -# else: -# # Create new test project if none exists -# project_result = await client.table('projects').insert({ -# "name": "test11", -# "account_id": account_id -# }).execute() -# project_id = project_result.data[0]['project_id'] -# print(f"\n✨ Created new test project: {project_id}") - -# # Create a thread for this project -# thread_result = await client.table('threads').insert({ -# 'project_id': project_id, -# 'account_id': account_id -# }).execute() -# thread_data = thread_result.data[0] if thread_result.data else None - -# if not thread_data: -# print("Error: No thread data returned") -# return - -# thread_id = thread_data['thread_id'] -# except Exception as e: -# print(f"Error setting up thread: {str(e)}") -# return - -# print(f"\nšŸ¤– Agent Thread Created: {thread_id}\n") - -# # Interactive message input loop -# while True: -# # Get user input -# user_message = input("\nšŸ’¬ Enter your message (or 'exit' to quit): ") -# if user_message.lower() == 'exit': -# break - -# if not user_message.strip(): -# print("\nšŸ”„ Running agent...\n") -# await process_agent_response(thread_id, project_id, thread_manager) -# continue - -# # Add the user message to the thread -# await thread_manager.add_message( -# thread_id=thread_id, -# type="user", -# content={ -# "role": "user", -# "content": user_message -# }, -# is_llm_message=True -# ) - -# print("\nšŸ”„ Running agent...\n") -# await process_agent_response(thread_id, project_id, thread_manager) - -# print("\nšŸ‘‹ Test completed. Goodbye!") - -# async def process_agent_response( -# thread_id: str, -# project_id: str, -# thread_manager: ThreadManager, -# stream: bool = True, -# model_name: str = "anthropic/claude-3-7-sonnet-latest", -# enable_thinking: Optional[bool] = False, -# reasoning_effort: Optional[str] = 'low', -# enable_context_manager: bool = True -# ): -# """Process the streaming response from the agent.""" -# chunk_counter = 0 -# current_response = "" -# tool_usage_counter = 0 # Renamed from tool_call_counter as we track usage via status - -# # Create a test sandbox for processing with a unique test prefix to avoid conflicts with production sandboxes -# # sandbox_pass = str(uuid4()) -# # sandbox = await create_sandbox(sandbox_pass) - -# # Store the original ID so we can refer to it -# # original_sandbox_id = sandbox.id - -# # Generate a clear test identifier -# # test_prefix = f"test_{uuid4().hex[:8]}_" -# logger.info(f"Created test sandbox with ID {original_sandbox_id} and test prefix {test_prefix}") - -# # Log the sandbox URL for debugging -# # print(f"\033[91mTest sandbox created: {str(await sandbox.get_preview_link(6080))}/vnc_lite.html?password={sandbox_pass}\033[0m") - -# async for chunk in run_agent( -# thread_id=thread_id, -# project_id=project_id, -# sandbox=sandbox, -# stream=stream, -# thread_manager=thread_manager, -# native_max_auto_continues=25, -# model_name=model_name, -# enable_thinking=enable_thinking, -# reasoning_effort=reasoning_effort, -# enable_context_manager=enable_context_manager -# ): -# chunk_counter += 1 -# # print(f"CHUNK: {chunk}") # Uncomment for debugging - -# if chunk.get('type') == 'assistant': -# # Try parsing the content JSON -# try: -# # Handle content as string or object -# content = chunk.get('content', '{}') -# if isinstance(content, str): -# content_json = json.loads(content) -# else: -# content_json = content - -# actual_content = content_json.get('content', '') -# # Print the actual assistant text content as it comes -# if actual_content: -# # Check if it contains XML tool tags, if so, print the whole tag for context -# if '<' in actual_content and '>' in actual_content: -# # Avoid printing potentially huge raw content if it's not just text -# if len(actual_content) < 500: # Heuristic limit -# print(actual_content, end='', flush=True) -# else: -# # Maybe just print a summary if it's too long or contains complex XML -# if '' in actual_content: print("...", end='', flush=True) -# elif '' in actual_content: print("...", end='', flush=True) -# else: print("...", end='', flush=True) # Generic case -# else: -# # Regular text content -# print(actual_content, end='', flush=True) -# current_response += actual_content # Accumulate only text part -# except json.JSONDecodeError: -# # If content is not JSON (e.g., just a string chunk), print directly -# raw_content = chunk.get('content', '') -# print(raw_content, end='', flush=True) -# current_response += raw_content -# except Exception as e: -# print(f"\nError processing assistant chunk: {e}\n") - -# elif chunk.get('type') == 'tool': # Updated from 'tool_result' -# # Add timestamp and format tool result nicely -# tool_name = "UnknownTool" # Try to get from metadata if available -# result_content = "No content" - -# # Parse metadata - handle both string and dict formats -# metadata = chunk.get('metadata', {}) -# if isinstance(metadata, str): -# try: -# metadata = json.loads(metadata) -# except json.JSONDecodeError: -# metadata = {} - -# linked_assistant_msg_id = metadata.get('assistant_message_id') -# parsing_details = metadata.get('parsing_details') -# if parsing_details: -# tool_name = parsing_details.get('xml_tag_name', 'UnknownTool') # Get name from parsing details - -# try: -# # Content is a JSON string or object -# content = chunk.get('content', '{}') -# if isinstance(content, str): -# content_json = json.loads(content) -# else: -# content_json = content - -# # The actual tool result is nested inside content.content -# tool_result_str = content_json.get('content', '') -# # Extract the actual tool result string (remove outer tag if present) -# match = re.search(rf'<{tool_name}>(.*?)', tool_result_str, re.DOTALL) -# if match: -# result_content = match.group(1).strip() -# # Try to parse the result string itself as JSON for pretty printing -# try: -# result_obj = json.loads(result_content) -# result_content = json.dumps(result_obj, indent=2) -# except json.JSONDecodeError: -# # Keep as string if not JSON -# pass -# else: -# # Fallback if tag extraction fails -# result_content = tool_result_str - -# except json.JSONDecodeError: -# result_content = chunk.get('content', 'Error parsing tool content') -# except Exception as e: -# result_content = f"Error processing tool chunk: {e}" - -# print(f"\n\nšŸ› ļø TOOL RESULT [{tool_name}] → {result_content}") - -# elif chunk.get('type') == 'status': -# # Log tool status changes -# try: -# # Handle content as string or object -# status_content = chunk.get('content', '{}') -# if isinstance(status_content, str): -# status_content = json.loads(status_content) - -# status_type = status_content.get('status_type') -# function_name = status_content.get('function_name', '') -# xml_tag_name = status_content.get('xml_tag_name', '') # Get XML tag if available -# tool_name = xml_tag_name or function_name # Prefer XML tag name - -# if status_type == 'tool_started' and tool_name: -# tool_usage_counter += 1 -# print(f"\nā³ TOOL STARTING #{tool_usage_counter} [{tool_name}]") -# print(" " + "-" * 40) -# # Return to the current content display -# if current_response: -# print("\nContinuing response:", flush=True) -# print(current_response, end='', flush=True) -# elif status_type == 'tool_completed' and tool_name: -# status_emoji = "āœ…" -# print(f"\n{status_emoji} TOOL COMPLETED: {tool_name}") -# elif status_type == 'finish': -# finish_reason = status_content.get('finish_reason', '') -# if finish_reason: -# print(f"\nšŸ“Œ Finished: {finish_reason}") -# # else: # Print other status types if needed for debugging -# # print(f"\nā„¹ļø STATUS: {chunk.get('content')}") - -# except json.JSONDecodeError: -# print(f"\nWarning: Could not parse status content JSON: {chunk.get('content')}") -# except Exception as e: -# print(f"\nError processing status chunk: {e}") - - -# # Removed elif chunk.get('type') == 'tool_call': block - -# # Update final message -# print(f"\n\nāœ… Agent run completed with {tool_usage_counter} tool executions") - -# # Try to clean up the test sandbox if possible -# try: -# # Attempt to delete/archive the sandbox to clean up resources -# # Note: Actual deletion may depend on the Daytona SDK's capabilities -# logger.info(f"Attempting to clean up test sandbox {original_sandbox_id}") -# # If there's a method to archive/delete the sandbox, call it here -# # Example: daytona.archive_sandbox(sandbox.id) -# except Exception as e: -# logger.warning(f"Failed to clean up test sandbox {original_sandbox_id}: {str(e)}") - -# if __name__ == "__main__": -# import asyncio - -# # Configure any environment variables or setup needed for testing -# load_dotenv() # Ensure environment variables are loaded - -# # Run the test function -# asyncio.run(test_agent()) \ No newline at end of file + langfuse.flush() \ No newline at end of file diff --git a/backend/agentpress/context_manager.py b/backend/agentpress/context_manager.py index e4035831..dd50f56d 100644 --- a/backend/agentpress/context_manager.py +++ b/backend/agentpress/context_manager.py @@ -9,15 +9,10 @@ import json from typing import List, Dict, Any, Optional, Union from litellm.utils import token_counter -from litellm.cost_calculator import completion_cost from services.supabase import DBConnection -from services.llm import make_llm_api_call from utils.logger import logger -# Constants for token management -DEFAULT_TOKEN_THRESHOLD = 120000 # 80k tokens threshold for summarization -SUMMARY_TARGET_TOKENS = 10000 # Target ~10k tokens for the summary message -RESERVE_TOKENS = 5000 # Reserve tokens for new messages +DEFAULT_TOKEN_THRESHOLD = 120000 class ContextManager: """Manages thread context including token counting and summarization.""" diff --git a/backend/api.py b/backend/api.py index 1063802f..392f9e0f 100644 --- a/backend/api.py +++ b/backend/api.py @@ -126,12 +126,17 @@ async def log_requests_middleware(request: Request, call_next): raise # Define allowed origins based on environment -allowed_origins = ["https://www.suna.so", "https://suna.so", "http://localhost:3000"] +allowed_origins = ["https://www.suna.so", "https://suna.so"] allow_origin_regex = None +# Add staging-specific origins +if config.ENV_MODE == EnvMode.LOCAL: + allowed_origins.append("http://localhost:3000") + # Add staging-specific origins if config.ENV_MODE == EnvMode.STAGING: allowed_origins.append("https://staging.suna.so") + allowed_origins.append("http://localhost:3000") allow_origin_regex = r"https://suna-.*-prjcts\.vercel\.app" app.add_middleware( diff --git a/backend/services/billing.py b/backend/services/billing.py index b47938a5..1f46a571 100644 --- a/backend/services/billing.py +++ b/backend/services/billing.py @@ -14,7 +14,7 @@ from services.supabase import DBConnection from utils.auth_utils import get_current_user_id_from_jwt from pydantic import BaseModel from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES -from litellm import cost_per_token, model_cost +from litellm import cost_per_token import time # Initialize Stripe