mirror of https://github.com/kortix-ai/suna.git
add tests
This commit is contained in:
parent
94ee217e36
commit
965a080a85
|
@ -69,6 +69,7 @@ async def run_conversation_turn(model: str, messages: list, user_prompt: str | l
|
|||
# Convert response object to dict and print as indented JSON
|
||||
try:
|
||||
print(json.dumps(response.dict(), indent=2))
|
||||
print(response._hidden_params)
|
||||
except Exception as e:
|
||||
print(f"Could not format response as JSON: {e}")
|
||||
print(response) # Fallback to printing the raw object if conversion fails
|
||||
|
@ -139,6 +140,7 @@ async def main(model_name: str, reasoning_effort: str = "medium"):
|
|||
if __name__ == "__main__":
|
||||
# Select the model to test
|
||||
model = "anthropic/claude-3-7-sonnet-latest"
|
||||
# model = "groq/llama-3.3-70b-versatile"
|
||||
# model = "openai/gpt-4o-mini"
|
||||
# model = "openai/gpt-4.1-2025-04-14" # Placeholder if needed
|
||||
|
||||
|
|
|
@ -90,9 +90,21 @@ async def run_streaming_conversation_turn(model: str, messages: list, user_promp
|
|||
# Stream to stdout in real-time
|
||||
print(chunk_content, end="", flush=True)
|
||||
|
||||
print("--------------------------------")
|
||||
print() # Newline after streaming finishes
|
||||
|
||||
# Print hidden params if available
|
||||
try:
|
||||
print("--- Hidden Params ---")
|
||||
print(stream_response._hidden_params)
|
||||
print("--- End Hidden Params ---")
|
||||
except AttributeError:
|
||||
print("(_hidden_params attribute not found on stream response object)")
|
||||
except Exception as e:
|
||||
print(f"Could not print _hidden_params: {e}")
|
||||
|
||||
print("--------------------------------")
|
||||
print() # Add another newline for separation
|
||||
|
||||
# Create a complete response object with the full content
|
||||
final_response = {
|
||||
"model": model,
|
||||
|
|
|
@ -64,7 +64,6 @@ account_id).limit(1).execute()
|
|||
thread_result = await client.table('threads').insert({
|
||||
'project_id': project_id,
|
||||
'account_id': account_id
|
||||
# 'name': f"Test Run - News Report - {asyncio.get_event_loop().time()}" # Removed name field
|
||||
}).execute()
|
||||
|
||||
if not thread_result.data:
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
"""
|
||||
Test script for running the AgentPress agent with thinking enabled.
|
||||
|
||||
This test specifically targets Anthropic models that support the 'reasoning_effort'
|
||||
parameter to observe the agent's behavior when thinking is explicitly enabled.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Ensure the backend directory is in the Python path
|
||||
backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if backend_dir not in sys.path:
|
||||
sys.path.insert(0, backend_dir)
|
||||
|
||||
import logging
|
||||
from agentpress.thread_manager import ThreadManager
|
||||
from services.supabase import DBConnection
|
||||
from agent.run import run_agent, process_agent_response # Reuse processing logic
|
||||
from utils.logger import logger
|
||||
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
async def test_agent_with_thinking():
|
||||
"""
|
||||
Test running the agent with thinking enabled for an Anthropic model.
|
||||
"""
|
||||
print("\n" + "="*80)
|
||||
print("🧪 TESTING AGENT RUN WITH THINKING ENABLED (Anthropic)")
|
||||
print("="*80 + "\n")
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize ThreadManager and DBConnection
|
||||
thread_manager = ThreadManager()
|
||||
db_connection = DBConnection()
|
||||
await db_connection.initialize() # Ensure connection is ready
|
||||
client = await db_connection.client
|
||||
|
||||
thread_id = None
|
||||
project_id = None
|
||||
project_created = False # Flag to track if we created the project
|
||||
|
||||
try:
|
||||
# --- Test Setup ---
|
||||
print("🔧 Setting up test environment (Project & Thread)...")
|
||||
logger.info("Setting up test project and thread...")
|
||||
|
||||
# Using a hardcoded account ID for consistency in tests
|
||||
account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59" # Replace if necessary
|
||||
test_project_name = "test_agent_thinking_project"
|
||||
logger.info(f"Using Account ID: {account_id}")
|
||||
|
||||
if not account_id:
|
||||
print("❌ Error: Could not determine Account ID.")
|
||||
logger.error("Could not determine Account ID.")
|
||||
return
|
||||
|
||||
# Find or create a test project
|
||||
project_result = await client.table('projects').select('*').eq('name', test_project_name).eq('account_id', account_id).limit(1).execute()
|
||||
|
||||
if project_result.data:
|
||||
project_id = project_result.data[0]['project_id']
|
||||
print(f"🔄 Using existing test project: {project_id}")
|
||||
logger.info(f"Using existing test project: {project_id}")
|
||||
else:
|
||||
project_insert_result = await client.table('projects').insert({
|
||||
"name": test_project_name,
|
||||
"account_id": account_id
|
||||
}).execute()
|
||||
if not project_insert_result.data:
|
||||
print("❌ Error: Failed to create test project.")
|
||||
logger.error("Failed to create test project.")
|
||||
return
|
||||
project_id = project_insert_result.data[0]['project_id']
|
||||
project_created = True
|
||||
print(f"✨ Created new test project: {project_id}")
|
||||
logger.info(f"Created new test project: {project_id}")
|
||||
|
||||
# Create a new thread for this test run
|
||||
thread_result = await client.table('threads').insert({
|
||||
'project_id': project_id,
|
||||
'account_id': account_id
|
||||
}).execute()
|
||||
|
||||
if not thread_result.data:
|
||||
print("❌ Error: Failed to create test thread.")
|
||||
logger.error("Failed to create test thread.")
|
||||
return
|
||||
|
||||
thread_id = thread_result.data[0]['thread_id']
|
||||
print(f"🧵 Created new test thread: {thread_id}")
|
||||
logger.info(f"Test Thread Created: {thread_id}")
|
||||
|
||||
# Add an initial user message that requires planning
|
||||
initial_message = "Create a plan to build a simple 'Hello World' HTML page in the workspace, then execute the first step of the plan."
|
||||
print(f"\n💬 Adding initial user message: '{initial_message}'")
|
||||
logger.info(f"Adding initial user message: '{initial_message}'")
|
||||
await thread_manager.add_message(
|
||||
thread_id=thread_id,
|
||||
type="user",
|
||||
content={
|
||||
"role": "user",
|
||||
"content": initial_message
|
||||
},
|
||||
is_llm_message=True
|
||||
)
|
||||
print("✅ Initial message added.")
|
||||
|
||||
# --- Run Agent with Thinking Enabled ---
|
||||
logger.info("Running agent ...")
|
||||
|
||||
# Use the process_agent_response helper to handle streaming output.
|
||||
# Pass the desired model, thinking, and stream parameters directly to it.
|
||||
await process_agent_response(
|
||||
thread_id=thread_id,
|
||||
project_id=project_id,
|
||||
thread_manager=thread_manager,
|
||||
stream=False, # Explicitly set stream to True for testing
|
||||
model_name="anthropic/claude-3-7-sonnet-latest", # Specify the model here
|
||||
enable_thinking=True, # Enable thinking here
|
||||
reasoning_effort='low' # Specify effort here
|
||||
)
|
||||
# await process_agent_response(
|
||||
# thread_id=thread_id,
|
||||
# project_id=project_id,
|
||||
# thread_manager=thread_manager,
|
||||
# model_name="openai/gpt-4.1-2025-04-14", # Specify the model here
|
||||
# model_name="groq/llama-3.3-70b-versatile",
|
||||
# enable_thinking=False, # Enable thinking here
|
||||
# reasoning_effort='low' # Specify effort here
|
||||
# )
|
||||
|
||||
# --- Direct Stream Processing (Alternative to process_agent_response) ---
|
||||
# The direct run_agent call above was removed as process_agent_response handles it.
|
||||
# print("\n--- Agent Response Stream ---")
|
||||
# async for chunk in agent_run_generator:
|
||||
# chunk_type = chunk.get('type', 'unknown')
|
||||
# if chunk_type == 'content' and 'content' in chunk:
|
||||
# print(chunk['content'], end='', flush=True)
|
||||
# elif chunk_type == 'tool_result':
|
||||
# tool_name = chunk.get('function_name', 'Tool')
|
||||
# result = chunk.get('result', '')
|
||||
# print(f"\n\n🛠️ TOOL RESULT [{tool_name}] → {result}", flush=True)
|
||||
# elif chunk_type == 'tool_status':
|
||||
# status = chunk.get('status', '')
|
||||
# func_name = chunk.get('function_name', '')
|
||||
# if status and func_name:
|
||||
# emoji = "✅" if status == "completed" else "⏳" if status == "started" else "❌"
|
||||
# print(f"\n{emoji} TOOL {status.upper()}: {func_name}", flush=True)
|
||||
# elif chunk_type == 'finish':
|
||||
# reason = chunk.get('finish_reason', '')
|
||||
# if reason:
|
||||
# print(f"\n📌 Finished: {reason}", flush=True)
|
||||
# elif chunk_type == 'error':
|
||||
# print(f"\n❌ ERROR: {chunk.get('message', 'Unknown error')}", flush=True)
|
||||
# break # Stop processing on error
|
||||
|
||||
print("\n\n✅ Agent run finished.")
|
||||
logger.info("Agent run finished.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ An error occurred during the test: {e}")
|
||||
logger.error(f"An error occurred during the test: {str(e)}", exc_info=True)
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# --- Cleanup ---
|
||||
print("\n🧹 Cleaning up test resources...")
|
||||
logger.info("Cleaning up test resources...")
|
||||
if thread_id:
|
||||
try:
|
||||
await client.table('messages').delete().eq('thread_id', thread_id).execute()
|
||||
await client.table('threads').delete().eq('thread_id', thread_id).execute()
|
||||
print(f"🗑️ Deleted test thread: {thread_id}")
|
||||
logger.info(f"Deleted test thread: {thread_id}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error cleaning up thread {thread_id}: {e}")
|
||||
logger.warning(f"Error cleaning up thread {thread_id}: {e}")
|
||||
if project_id and project_created: # Only delete if we created it in this run
|
||||
try:
|
||||
await client.table('projects').delete().eq('project_id', project_id).execute()
|
||||
print(f"🗑️ Deleted test project: {project_id}")
|
||||
logger.info(f"Deleted test project: {project_id}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error cleaning up project {project_id}: {e}")
|
||||
logger.warning(f"Error cleaning up project {project_id}: {e}")
|
||||
|
||||
# Disconnect DB
|
||||
await db_connection.disconnect()
|
||||
logger.info("Database connection closed.")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("🏁 THINKING TEST COMPLETE")
|
||||
print("="*80 + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure the logger is configured
|
||||
logger.info("Starting test_agent_thinking script...")
|
||||
try:
|
||||
asyncio.run(test_agent_with_thinking())
|
||||
print("\n✅ Test script completed successfully.")
|
||||
sys.exit(0)
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n❌ Test interrupted by user.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n\n❌ Error running test script: {e}")
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
Loading…
Reference in New Issue