add tests

2025-04-17 22:52:01 +01:00 · 2025-04-17 22:52:01 +01:00 · 965a080a85
parent 94ee217e36
commit 965a080a85
4 changed files with 230 additions and 2 deletions
--- a/backend/tests/raw_litellm_call_test.py
+++ b/backend/tests/raw_litellm_call_test.py
@ -69,6 +69,7 @@ async def run_conversation_turn(model: str, messages: list, user_prompt: str | l
    # Convert response object to dict and print as indented JSON
    try:
        print(json.dumps(response.dict(), indent=2))
+        print(response._hidden_params)
    except Exception as e:
        print(f"Could not format response as JSON: {e}")
        print(response) # Fallback to printing the raw object if conversion fails
@ -139,6 +140,7 @@ async def main(model_name: str, reasoning_effort: str = "medium"):
 if __name__ == "__main__":
    # Select the model to test
    model = "anthropic/claude-3-7-sonnet-latest"
+    # model = "groq/llama-3.3-70b-versatile"
    # model = "openai/gpt-4o-mini"
    # model = "openai/gpt-4.1-2025-04-14" # Placeholder if needed

--- a/backend/tests/raw_litellm_stream_test.py
+++ b/backend/tests/raw_litellm_stream_test.py
@ -90,9 +90,21 @@ async def run_streaming_conversation_turn(model: str, messages: list, user_promp
                # Stream to stdout in real-time
                print(chunk_content, end="", flush=True)

-    print("--------------------------------")
    print() # Newline after streaming finishes

+    # Print hidden params if available
+    try:
+        print("--- Hidden Params ---")
+        print(stream_response._hidden_params)
+        print("--- End Hidden Params ---")
+    except AttributeError:
+        print("(_hidden_params attribute not found on stream response object)")
+    except Exception as e:
+        print(f"Could not print _hidden_params: {e}")
+
+    print("--------------------------------")
+    print() # Add another newline for separation
+    
    # Create a complete response object with the full content
    final_response = {
        "model": model,
--- a/backend/tests/run_agent_test.py
+++ b/backend/tests/run_agent_test.py
@ -64,7 +64,6 @@ account_id).limit(1).execute()
        thread_result = await client.table('threads').insert({
            'project_id': project_id,
            'account_id': account_id
-            # 'name': f"Test Run - News Report - {asyncio.get_event_loop().time()}" # Removed name field
        }).execute()

        if not thread_result.data:
--- a/backend/tests/test_agent_thinking.py
+++ b/backend/tests/test_agent_thinking.py
@ -0,0 +1,215 @@
+"""
+Test script for running the AgentPress agent with thinking enabled.
+
+This test specifically targets Anthropic models that support the 'reasoning_effort'
+parameter to observe the agent's behavior when thinking is explicitly enabled.
+"""
+
+import asyncio
+import json
+import os
+import sys
+import traceback
+from dotenv import load_dotenv
+
+# Ensure the backend directory is in the Python path
+backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if backend_dir not in sys.path:
+    sys.path.insert(0, backend_dir)
+
+import logging
+from agentpress.thread_manager import ThreadManager
+from services.supabase import DBConnection
+from agent.run import run_agent, process_agent_response # Reuse processing logic
+from utils.logger import logger
+
+logger.setLevel(logging.DEBUG)
+
+async def test_agent_with_thinking():
+    """
+    Test running the agent with thinking enabled for an Anthropic model.
+    """
+    print("\n" + "="*80)
+    print("🧪 TESTING AGENT RUN WITH THINKING ENABLED (Anthropic)")
+    print("="*80 + "\n")
+
+    # Load environment variables
+    load_dotenv()
+
+    # Initialize ThreadManager and DBConnection
+    thread_manager = ThreadManager()
+    db_connection = DBConnection()
+    await db_connection.initialize() # Ensure connection is ready
+    client = await db_connection.client
+
+    thread_id = None
+    project_id = None
+    project_created = False # Flag to track if we created the project
+
+    try:
+        # --- Test Setup ---
+        print("🔧 Setting up test environment (Project & Thread)...")
+        logger.info("Setting up test project and thread...")
+
+        # Using a hardcoded account ID for consistency in tests
+        account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59" # Replace if necessary
+        test_project_name = "test_agent_thinking_project"
+        logger.info(f"Using Account ID: {account_id}")
+
+        if not account_id:
+            print("❌ Error: Could not determine Account ID.")
+            logger.error("Could not determine Account ID.")
+            return
+
+        # Find or create a test project
+        project_result = await client.table('projects').select('*').eq('name', test_project_name).eq('account_id', account_id).limit(1).execute()
+
+        if project_result.data:
+            project_id = project_result.data[0]['project_id']
+            print(f"🔄 Using existing test project: {project_id}")
+            logger.info(f"Using existing test project: {project_id}")
+        else:
+            project_insert_result = await client.table('projects').insert({
+                "name": test_project_name,
+                "account_id": account_id
+            }).execute()
+            if not project_insert_result.data:
+                 print("❌ Error: Failed to create test project.")
+                 logger.error("Failed to create test project.")
+                 return
+            project_id = project_insert_result.data[0]['project_id']
+            project_created = True
+            print(f"✨ Created new test project: {project_id}")
+            logger.info(f"Created new test project: {project_id}")
+
+        # Create a new thread for this test run
+        thread_result = await client.table('threads').insert({
+            'project_id': project_id,
+            'account_id': account_id
+        }).execute()
+
+        if not thread_result.data:
+            print("❌ Error: Failed to create test thread.")
+            logger.error("Failed to create test thread.")
+            return
+
+        thread_id = thread_result.data[0]['thread_id']
+        print(f"🧵 Created new test thread: {thread_id}")
+        logger.info(f"Test Thread Created: {thread_id}")
+
+        # Add an initial user message that requires planning
+        initial_message = "Create a plan to build a simple 'Hello World' HTML page in the workspace, then execute the first step of the plan."
+        print(f"\n💬 Adding initial user message: '{initial_message}'")
+        logger.info(f"Adding initial user message: '{initial_message}'")
+        await thread_manager.add_message(
+            thread_id=thread_id,
+            type="user",
+            content={
+                "role": "user",
+                "content": initial_message
+            },
+            is_llm_message=True
+        )
+        print("✅ Initial message added.")
+
+        # --- Run Agent with Thinking Enabled ---
+        logger.info("Running agent ...")
+
+        # Use the process_agent_response helper to handle streaming output.
+        # Pass the desired model, thinking, and stream parameters directly to it.
+        await process_agent_response(
+            thread_id=thread_id,
+            project_id=project_id,
+            thread_manager=thread_manager,
+            stream=False, # Explicitly set stream to True for testing
+            model_name="anthropic/claude-3-7-sonnet-latest", # Specify the model here
+            enable_thinking=True,                         # Enable thinking here
+            reasoning_effort='low'                        # Specify effort here
+        )
+        # await process_agent_response(
+            # thread_id=thread_id,
+            # project_id=project_id,
+            # thread_manager=thread_manager,
+            # model_name="openai/gpt-4.1-2025-04-14", # Specify the model here
+            # model_name="groq/llama-3.3-70b-versatile",
+            # enable_thinking=False,                         # Enable thinking here
+            # reasoning_effort='low'                        # Specify effort here
+        # )
+
+        # --- Direct Stream Processing (Alternative to process_agent_response) ---
+        # The direct run_agent call above was removed as process_agent_response handles it.
+        # print("\n--- Agent Response Stream ---")
+        # async for chunk in agent_run_generator:
+        #     chunk_type = chunk.get('type', 'unknown')
+        #     if chunk_type == 'content' and 'content' in chunk:
+        #         print(chunk['content'], end='', flush=True)
+        #     elif chunk_type == 'tool_result':
+        #         tool_name = chunk.get('function_name', 'Tool')
+        #         result = chunk.get('result', '')
+        #         print(f"\n\n🛠️ TOOL RESULT [{tool_name}] → {result}", flush=True)
+        #     elif chunk_type == 'tool_status':
+        #         status = chunk.get('status', '')
+        #         func_name = chunk.get('function_name', '')
+        #         if status and func_name:
+        #             emoji = "✅" if status == "completed" else "⏳" if status == "started" else "❌"
+        #             print(f"\n{emoji} TOOL {status.upper()}: {func_name}", flush=True)
+        #     elif chunk_type == 'finish':
+        #         reason = chunk.get('finish_reason', '')
+        #         if reason:
+        #             print(f"\n📌 Finished: {reason}", flush=True)
+        #     elif chunk_type == 'error':
+        #         print(f"\n❌ ERROR: {chunk.get('message', 'Unknown error')}", flush=True)
+        #         break # Stop processing on error
+
+        print("\n\n✅ Agent run finished.")
+        logger.info("Agent run finished.")
+
+    except Exception as e:
+        print(f"\n❌ An error occurred during the test: {e}")
+        logger.error(f"An error occurred during the test: {str(e)}", exc_info=True)
+        traceback.print_exc()
+    finally:
+        # --- Cleanup ---
+        print("\n🧹 Cleaning up test resources...")
+        logger.info("Cleaning up test resources...")
+        if thread_id:
+            try:
+                await client.table('messages').delete().eq('thread_id', thread_id).execute()
+                await client.table('threads').delete().eq('thread_id', thread_id).execute()
+                print(f"🗑️ Deleted test thread: {thread_id}")
+                logger.info(f"Deleted test thread: {thread_id}")
+            except Exception as e:
+                print(f"⚠️ Error cleaning up thread {thread_id}: {e}")
+                logger.warning(f"Error cleaning up thread {thread_id}: {e}")
+        if project_id and project_created: # Only delete if we created it in this run
+            try:
+                await client.table('projects').delete().eq('project_id', project_id).execute()
+                print(f"🗑️ Deleted test project: {project_id}")
+                logger.info(f"Deleted test project: {project_id}")
+            except Exception as e:
+                print(f"⚠️ Error cleaning up project {project_id}: {e}")
+                logger.warning(f"Error cleaning up project {project_id}: {e}")
+
+        # Disconnect DB
+        await db_connection.disconnect()
+        logger.info("Database connection closed.")
+
+    print("\n" + "="*80)
+    print("🏁 THINKING TEST COMPLETE")
+    print("="*80 + "\n")
+
+
+if __name__ == "__main__":
+    # Ensure the logger is configured
+    logger.info("Starting test_agent_thinking script...")
+    try:
+        asyncio.run(test_agent_with_thinking())
+        print("\n✅ Test script completed successfully.")
+        sys.exit(0)
+    except KeyboardInterrupt:
+        print("\n\n❌ Test interrupted by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n\n❌ Error running test script: {e}")
+        traceback.print_exc()
+        sys.exit(1)