add tests

This commit is contained in:
LE Quoc Dat 2025-04-17 22:52:01 +01:00
parent 94ee217e36
commit 965a080a85
4 changed files with 230 additions and 2 deletions

View File

@ -69,6 +69,7 @@ async def run_conversation_turn(model: str, messages: list, user_prompt: str | l
# Convert response object to dict and print as indented JSON # Convert response object to dict and print as indented JSON
try: try:
print(json.dumps(response.dict(), indent=2)) print(json.dumps(response.dict(), indent=2))
print(response._hidden_params)
except Exception as e: except Exception as e:
print(f"Could not format response as JSON: {e}") print(f"Could not format response as JSON: {e}")
print(response) # Fallback to printing the raw object if conversion fails print(response) # Fallback to printing the raw object if conversion fails
@ -139,6 +140,7 @@ async def main(model_name: str, reasoning_effort: str = "medium"):
if __name__ == "__main__": if __name__ == "__main__":
# Select the model to test # Select the model to test
model = "anthropic/claude-3-7-sonnet-latest" model = "anthropic/claude-3-7-sonnet-latest"
# model = "groq/llama-3.3-70b-versatile"
# model = "openai/gpt-4o-mini" # model = "openai/gpt-4o-mini"
# model = "openai/gpt-4.1-2025-04-14" # Placeholder if needed # model = "openai/gpt-4.1-2025-04-14" # Placeholder if needed

View File

@ -90,8 +90,20 @@ async def run_streaming_conversation_turn(model: str, messages: list, user_promp
# Stream to stdout in real-time # Stream to stdout in real-time
print(chunk_content, end="", flush=True) print(chunk_content, end="", flush=True)
print("--------------------------------")
print() # Newline after streaming finishes print() # Newline after streaming finishes
# Print hidden params if available
try:
print("--- Hidden Params ---")
print(stream_response._hidden_params)
print("--- End Hidden Params ---")
except AttributeError:
print("(_hidden_params attribute not found on stream response object)")
except Exception as e:
print(f"Could not print _hidden_params: {e}")
print("--------------------------------")
print() # Add another newline for separation
# Create a complete response object with the full content # Create a complete response object with the full content
final_response = { final_response = {

View File

@ -64,7 +64,6 @@ account_id).limit(1).execute()
thread_result = await client.table('threads').insert({ thread_result = await client.table('threads').insert({
'project_id': project_id, 'project_id': project_id,
'account_id': account_id 'account_id': account_id
# 'name': f"Test Run - News Report - {asyncio.get_event_loop().time()}" # Removed name field
}).execute() }).execute()
if not thread_result.data: if not thread_result.data:

View File

@ -0,0 +1,215 @@
"""
Test script for running the AgentPress agent with thinking enabled.
This test specifically targets Anthropic models that support the 'reasoning_effort'
parameter to observe the agent's behavior when thinking is explicitly enabled.
"""
import asyncio
import json
import os
import sys
import traceback
from dotenv import load_dotenv
# Ensure the backend directory is in the Python path
backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if backend_dir not in sys.path:
sys.path.insert(0, backend_dir)
import logging
from agentpress.thread_manager import ThreadManager
from services.supabase import DBConnection
from agent.run import run_agent, process_agent_response # Reuse processing logic
from utils.logger import logger
logger.setLevel(logging.DEBUG)
async def test_agent_with_thinking():
"""
Test running the agent with thinking enabled for an Anthropic model.
"""
print("\n" + "="*80)
print("🧪 TESTING AGENT RUN WITH THINKING ENABLED (Anthropic)")
print("="*80 + "\n")
# Load environment variables
load_dotenv()
# Initialize ThreadManager and DBConnection
thread_manager = ThreadManager()
db_connection = DBConnection()
await db_connection.initialize() # Ensure connection is ready
client = await db_connection.client
thread_id = None
project_id = None
project_created = False # Flag to track if we created the project
try:
# --- Test Setup ---
print("🔧 Setting up test environment (Project & Thread)...")
logger.info("Setting up test project and thread...")
# Using a hardcoded account ID for consistency in tests
account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59" # Replace if necessary
test_project_name = "test_agent_thinking_project"
logger.info(f"Using Account ID: {account_id}")
if not account_id:
print("❌ Error: Could not determine Account ID.")
logger.error("Could not determine Account ID.")
return
# Find or create a test project
project_result = await client.table('projects').select('*').eq('name', test_project_name).eq('account_id', account_id).limit(1).execute()
if project_result.data:
project_id = project_result.data[0]['project_id']
print(f"🔄 Using existing test project: {project_id}")
logger.info(f"Using existing test project: {project_id}")
else:
project_insert_result = await client.table('projects').insert({
"name": test_project_name,
"account_id": account_id
}).execute()
if not project_insert_result.data:
print("❌ Error: Failed to create test project.")
logger.error("Failed to create test project.")
return
project_id = project_insert_result.data[0]['project_id']
project_created = True
print(f"✨ Created new test project: {project_id}")
logger.info(f"Created new test project: {project_id}")
# Create a new thread for this test run
thread_result = await client.table('threads').insert({
'project_id': project_id,
'account_id': account_id
}).execute()
if not thread_result.data:
print("❌ Error: Failed to create test thread.")
logger.error("Failed to create test thread.")
return
thread_id = thread_result.data[0]['thread_id']
print(f"🧵 Created new test thread: {thread_id}")
logger.info(f"Test Thread Created: {thread_id}")
# Add an initial user message that requires planning
initial_message = "Create a plan to build a simple 'Hello World' HTML page in the workspace, then execute the first step of the plan."
print(f"\n💬 Adding initial user message: '{initial_message}'")
logger.info(f"Adding initial user message: '{initial_message}'")
await thread_manager.add_message(
thread_id=thread_id,
type="user",
content={
"role": "user",
"content": initial_message
},
is_llm_message=True
)
print("✅ Initial message added.")
# --- Run Agent with Thinking Enabled ---
logger.info("Running agent ...")
# Use the process_agent_response helper to handle streaming output.
# Pass the desired model, thinking, and stream parameters directly to it.
await process_agent_response(
thread_id=thread_id,
project_id=project_id,
thread_manager=thread_manager,
stream=False, # Explicitly set stream to True for testing
model_name="anthropic/claude-3-7-sonnet-latest", # Specify the model here
enable_thinking=True, # Enable thinking here
reasoning_effort='low' # Specify effort here
)
# await process_agent_response(
# thread_id=thread_id,
# project_id=project_id,
# thread_manager=thread_manager,
# model_name="openai/gpt-4.1-2025-04-14", # Specify the model here
# model_name="groq/llama-3.3-70b-versatile",
# enable_thinking=False, # Enable thinking here
# reasoning_effort='low' # Specify effort here
# )
# --- Direct Stream Processing (Alternative to process_agent_response) ---
# The direct run_agent call above was removed as process_agent_response handles it.
# print("\n--- Agent Response Stream ---")
# async for chunk in agent_run_generator:
# chunk_type = chunk.get('type', 'unknown')
# if chunk_type == 'content' and 'content' in chunk:
# print(chunk['content'], end='', flush=True)
# elif chunk_type == 'tool_result':
# tool_name = chunk.get('function_name', 'Tool')
# result = chunk.get('result', '')
# print(f"\n\n🛠 TOOL RESULT [{tool_name}] → {result}", flush=True)
# elif chunk_type == 'tool_status':
# status = chunk.get('status', '')
# func_name = chunk.get('function_name', '')
# if status and func_name:
# emoji = "✅" if status == "completed" else "⏳" if status == "started" else "❌"
# print(f"\n{emoji} TOOL {status.upper()}: {func_name}", flush=True)
# elif chunk_type == 'finish':
# reason = chunk.get('finish_reason', '')
# if reason:
# print(f"\n📌 Finished: {reason}", flush=True)
# elif chunk_type == 'error':
# print(f"\n❌ ERROR: {chunk.get('message', 'Unknown error')}", flush=True)
# break # Stop processing on error
print("\n\n✅ Agent run finished.")
logger.info("Agent run finished.")
except Exception as e:
print(f"\n❌ An error occurred during the test: {e}")
logger.error(f"An error occurred during the test: {str(e)}", exc_info=True)
traceback.print_exc()
finally:
# --- Cleanup ---
print("\n🧹 Cleaning up test resources...")
logger.info("Cleaning up test resources...")
if thread_id:
try:
await client.table('messages').delete().eq('thread_id', thread_id).execute()
await client.table('threads').delete().eq('thread_id', thread_id).execute()
print(f"🗑️ Deleted test thread: {thread_id}")
logger.info(f"Deleted test thread: {thread_id}")
except Exception as e:
print(f"⚠️ Error cleaning up thread {thread_id}: {e}")
logger.warning(f"Error cleaning up thread {thread_id}: {e}")
if project_id and project_created: # Only delete if we created it in this run
try:
await client.table('projects').delete().eq('project_id', project_id).execute()
print(f"🗑️ Deleted test project: {project_id}")
logger.info(f"Deleted test project: {project_id}")
except Exception as e:
print(f"⚠️ Error cleaning up project {project_id}: {e}")
logger.warning(f"Error cleaning up project {project_id}: {e}")
# Disconnect DB
await db_connection.disconnect()
logger.info("Database connection closed.")
print("\n" + "="*80)
print("🏁 THINKING TEST COMPLETE")
print("="*80 + "\n")
if __name__ == "__main__":
# Ensure the logger is configured
logger.info("Starting test_agent_thinking script...")
try:
asyncio.run(test_agent_with_thinking())
print("\n✅ Test script completed successfully.")
sys.exit(0)
except KeyboardInterrupt:
print("\n\n❌ Test interrupted by user.")
sys.exit(1)
except Exception as e:
print(f"\n\n❌ Error running test script: {e}")
traceback.print_exc()
sys.exit(1)