add tests

2025-04-17 22:52:01 +01:00 · 2025-04-17 22:52:01 +01:00 · 965a080a85
parent 94ee217e36
commit 965a080a85
4 changed files with 230 additions and 2 deletions
--- a/backend/tests/raw_litellm_call_test.py
+++ b/backend/tests/raw_litellm_call_test.py
@ -69,6 +69,7 @@ async def run_conversation_turn(model: str, messages: list, user_prompt: str | l
    # Convert response object to dict and print as indented JSON
    try:
        print(json.dumps(response.dict(), indent=2))
        print(response._hidden_params)
    except Exception as e:
        print(f"Could not format response as JSON: {e}")
        print(response) # Fallback to printing the raw object if conversion fails
@ -139,6 +140,7 @@ async def main(model_name: str, reasoning_effort: str = "medium"):
 if __name__ == "__main__":
    # Select the model to test
    model = "anthropic/claude-3-7-sonnet-latest"
    # model = "groq/llama-3.3-70b-versatile"
    # model = "openai/gpt-4o-mini"
    # model = "openai/gpt-4.1-2025-04-14" # Placeholder if needed
--- a/backend/tests/raw_litellm_stream_test.py
+++ b/backend/tests/raw_litellm_stream_test.py
@ -90,8 +90,20 @@ async def run_streaming_conversation_turn(model: str, messages: list, user_promp
                # Stream to stdout in real-time
                print(chunk_content, end="", flush=True)
    print("--------------------------------")
    print() # Newline after streaming finishes
    # Print hidden params if available
    try:
        print("--- Hidden Params ---")
        print(stream_response._hidden_params)
        print("--- End Hidden Params ---")
    except AttributeError:
        print("(_hidden_params attribute not found on stream response object)")
    except Exception as e:
        print(f"Could not print _hidden_params: {e}")
    print("--------------------------------")
    print() # Add another newline for separation
    # Create a complete response object with the full content
    final_response = {
--- a/backend/tests/run_agent_test.py
+++ b/backend/tests/run_agent_test.py
@ -64,7 +64,6 @@ account_id).limit(1).execute()
        thread_result = await client.table('threads').insert({
            'project_id': project_id,
            'account_id': account_id
            # 'name': f"Test Run - News Report - {asyncio.get_event_loop().time()}" # Removed name field
        }).execute()
        if not thread_result.data:
--- a/backend/tests/test_agent_thinking.py
+++ b/backend/tests/test_agent_thinking.py
@ -0,0 +1,215 @@
 """
 Test script for running the AgentPress agent with thinking enabled.
 This test specifically targets Anthropic models that support the 'reasoning_effort'
 parameter to observe the agent's behavior when thinking is explicitly enabled.
 """
 import asyncio
 import json
 import os
 import sys
 import traceback
 from dotenv import load_dotenv
 # Ensure the backend directory is in the Python path
 backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 if backend_dir not in sys.path:
    sys.path.insert(0, backend_dir)
 import logging
 from agentpress.thread_manager import ThreadManager
 from services.supabase import DBConnection
 from agent.run import run_agent, process_agent_response # Reuse processing logic
 from utils.logger import logger
 logger.setLevel(logging.DEBUG)
 async def test_agent_with_thinking():
    """
    Test running the agent with thinking enabled for an Anthropic model.
    """
    print("\n" + "="*80)
    print("🧪 TESTING AGENT RUN WITH THINKING ENABLED (Anthropic)")
    print("="*80 + "\n")
    # Load environment variables
    load_dotenv()
    # Initialize ThreadManager and DBConnection
    thread_manager = ThreadManager()
    db_connection = DBConnection()
    await db_connection.initialize() # Ensure connection is ready
    client = await db_connection.client
    thread_id = None
    project_id = None
    project_created = False # Flag to track if we created the project
    try:
        # --- Test Setup ---
        print("🔧 Setting up test environment (Project & Thread)...")
        logger.info("Setting up test project and thread...")
        # Using a hardcoded account ID for consistency in tests
        account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59" # Replace if necessary
        test_project_name = "test_agent_thinking_project"
        logger.info(f"Using Account ID: {account_id}")
        if not account_id:
            print("❌ Error: Could not determine Account ID.")
            logger.error("Could not determine Account ID.")
            return
        # Find or create a test project
        project_result = await client.table('projects').select('*').eq('name', test_project_name).eq('account_id', account_id).limit(1).execute()
        if project_result.data:
            project_id = project_result.data[0]['project_id']
            print(f"🔄 Using existing test project: {project_id}")
            logger.info(f"Using existing test project: {project_id}")
        else:
            project_insert_result = await client.table('projects').insert({
                "name": test_project_name,
                "account_id": account_id
            }).execute()
            if not project_insert_result.data:
                 print("❌ Error: Failed to create test project.")
                 logger.error("Failed to create test project.")
                 return
            project_id = project_insert_result.data[0]['project_id']
            project_created = True
            print(f"✨ Created new test project: {project_id}")
            logger.info(f"Created new test project: {project_id}")
        # Create a new thread for this test run
        thread_result = await client.table('threads').insert({
            'project_id': project_id,
            'account_id': account_id
        }).execute()
        if not thread_result.data:
            print("❌ Error: Failed to create test thread.")
            logger.error("Failed to create test thread.")
            return
        thread_id = thread_result.data[0]['thread_id']
        print(f"🧵 Created new test thread: {thread_id}")
        logger.info(f"Test Thread Created: {thread_id}")
        # Add an initial user message that requires planning
        initial_message = "Create a plan to build a simple 'Hello World' HTML page in the workspace, then execute the first step of the plan."
        print(f"\n💬 Adding initial user message: '{initial_message}'")
        logger.info(f"Adding initial user message: '{initial_message}'")
        await thread_manager.add_message(
            thread_id=thread_id,
            type="user",
            content={
                "role": "user",
                "content": initial_message
            },
            is_llm_message=True
        )
        print("✅ Initial message added.")
        # --- Run Agent with Thinking Enabled ---
        logger.info("Running agent ...")
        # Use the process_agent_response helper to handle streaming output.
        # Pass the desired model, thinking, and stream parameters directly to it.
        await process_agent_response(
            thread_id=thread_id,
            project_id=project_id,
            thread_manager=thread_manager,
            stream=False, # Explicitly set stream to True for testing
            model_name="anthropic/claude-3-7-sonnet-latest", # Specify the model here
            enable_thinking=True,                         # Enable thinking here
            reasoning_effort='low'                        # Specify effort here
        )
        # await process_agent_response(
            # thread_id=thread_id,
            # project_id=project_id,
            # thread_manager=thread_manager,
            # model_name="openai/gpt-4.1-2025-04-14", # Specify the model here
            # model_name="groq/llama-3.3-70b-versatile",
            # enable_thinking=False,                         # Enable thinking here
            # reasoning_effort='low'                        # Specify effort here
        # )
        # --- Direct Stream Processing (Alternative to process_agent_response) ---
        # The direct run_agent call above was removed as process_agent_response handles it.
        # print("\n--- Agent Response Stream ---")
        # async for chunk in agent_run_generator:
        #     chunk_type = chunk.get('type', 'unknown')
        #     if chunk_type == 'content' and 'content' in chunk:
        #         print(chunk['content'], end='', flush=True)
        #     elif chunk_type == 'tool_result':
        #         tool_name = chunk.get('function_name', 'Tool')
        #         result = chunk.get('result', '')
        #         print(f"\n\n🛠️ TOOL RESULT [{tool_name}] → {result}", flush=True)
        #     elif chunk_type == 'tool_status':
        #         status = chunk.get('status', '')
        #         func_name = chunk.get('function_name', '')
        #         if status and func_name:
        #             emoji = "✅" if status == "completed" else "⏳" if status == "started" else "❌"
        #             print(f"\n{emoji} TOOL {status.upper()}: {func_name}", flush=True)
        #     elif chunk_type == 'finish':
        #         reason = chunk.get('finish_reason', '')
        #         if reason:
        #             print(f"\n📌 Finished: {reason}", flush=True)
        #     elif chunk_type == 'error':
        #         print(f"\n❌ ERROR: {chunk.get('message', 'Unknown error')}", flush=True)
        #         break # Stop processing on error
        print("\n\n✅ Agent run finished.")
        logger.info("Agent run finished.")
    except Exception as e:
        print(f"\n❌ An error occurred during the test: {e}")
        logger.error(f"An error occurred during the test: {str(e)}", exc_info=True)
        traceback.print_exc()
    finally:
        # --- Cleanup ---
        print("\n🧹 Cleaning up test resources...")
        logger.info("Cleaning up test resources...")
        if thread_id:
            try:
                await client.table('messages').delete().eq('thread_id', thread_id).execute()
                await client.table('threads').delete().eq('thread_id', thread_id).execute()
                print(f"🗑️ Deleted test thread: {thread_id}")
                logger.info(f"Deleted test thread: {thread_id}")
            except Exception as e:
                print(f"⚠️ Error cleaning up thread {thread_id}: {e}")
                logger.warning(f"Error cleaning up thread {thread_id}: {e}")
        if project_id and project_created: # Only delete if we created it in this run
            try:
                await client.table('projects').delete().eq('project_id', project_id).execute()
                print(f"🗑️ Deleted test project: {project_id}")
                logger.info(f"Deleted test project: {project_id}")
            except Exception as e:
                print(f"⚠️ Error cleaning up project {project_id}: {e}")
                logger.warning(f"Error cleaning up project {project_id}: {e}")
        # Disconnect DB
        await db_connection.disconnect()
        logger.info("Database connection closed.")
    print("\n" + "="*80)
    print("🏁 THINKING TEST COMPLETE")
    print("="*80 + "\n")
 if __name__ == "__main__":
    # Ensure the logger is configured
    logger.info("Starting test_agent_thinking script...")
    try:
        asyncio.run(test_agent_with_thinking())
        print("\n✅ Test script completed successfully.")
        sys.exit(0)
    except KeyboardInterrupt:
        print("\n\n❌ Test interrupted by user.")
        sys.exit(1)
    except Exception as e:
        print(f"\n\n❌ Error running test script: {e}")
        traceback.print_exc()
        sys.exit(1)