suna/backend/tests/test_simple_tools.py

"""
Simple test script for LLM API with tool calling functionality.

This script tests basic tool calling with both streaming and non-streaming to verify functionality.
"""

import asyncio
import json
from typing import Dict, Any

from services.llm import make_llm_api_call
from utils.logger import logger

# Example tool schema from files_tool.py
CREATE_FILE_SCHEMA = {
    "type": "function",
    "function": {
        "name": "create_file",
        "description": "Create a new file with the provided contents at a given path in the workspace",
        "parameters": {
            "type": "object",
            "properties": {
                "file_path": {
                    "type": "string",
                    "description": "Path to the file to be created"
                },
                "file_contents": {
                    "type": "string",
                    "description": "The content to write to the file"
                }
            },
            "required": ["file_path", "file_contents"]
        }
    }
}

async def test_simple_tool_call():
    """Test a simple non-streaming tool call to verify functionality."""
    # Setup conversation
    messages = [
        {"role": "system", "content": "You are a helpful assistant with access to file management tools."},
        {"role": "user", "content": "Create an HTML file named hello.html with a simple Hello World message."}
    ]
    
    print("\n=== Testing non-streaming tool call ===\n")
    
    try:
        # Make API call with tool
        response = await make_llm_api_call(
            messages=messages,
            model_name="gpt-4o",
            temperature=0.0,
            tools=[CREATE_FILE_SCHEMA],
            tool_choice={"type": "function", "function": {"name": "create_file"}}
        )
        
        # Print basic response info
        print(f"Response model: {response.model}")
        print(f"Response type: {type(response)}")
        
        # Check if the response has tool calls
        assistant_message = response.choices[0].message
        print(f"\nAssistant message content: {assistant_message.content}")
        
        if hasattr(assistant_message, 'tool_calls') and assistant_message.tool_calls:
            print("\nTool calls detected:")
            
            for i, tool_call in enumerate(assistant_message.tool_calls):
                print(f"\nTool call {i+1}:")
                print(f"  ID: {tool_call.id}")
                print(f"  Type: {tool_call.type}")
                print(f"  Function: {tool_call.function.name}")
                print(f"  Arguments:")
                
                try:
                    args = json.loads(tool_call.function.arguments)
                    print(json.dumps(args, indent=4))
                    
                    # Access and print specific arguments 
                    if tool_call.function.name == "create_file":
                        print(f"\nFile path: {args.get('file_path')}")
                        print(f"File contents length: {len(args.get('file_contents', ''))}")
                        print(f"File contents preview: {args.get('file_contents', '')[:100]}...")
                except Exception as e:
                    print(f"Error parsing arguments: {e}")
        else:
            print("\nNo tool calls found in the response.")
            print(f"Full response: {response}")
    
    except Exception as e:
        logger.error(f"Error in test: {str(e)}", exc_info=True)

async def test_streaming_tool_call():
    """Test tool calling with streaming to observe behavior."""
    # Setup conversation
    messages = [
        {"role": "system", "content": "You are a helpful assistant with access to file management tools. YOU ALWAYS USE MULTIPLE TOOL FUNCTION CALLS AT ONCE. YOU NEVER USE ONE TOOL FUNCTION CALL AT A TIME."},
        {"role": "user", "content": "Create 10 random files with different extensions and content."}
    ]
    
    print("\n=== Testing streaming tool call ===\n")
    
    try:
        # Make API call with tool in streaming mode
        print("Sending streaming request...")
        stream_response = await make_llm_api_call(
            messages=messages,
            model_name="anthropic/claude-3-5-sonnet-latest",
            temperature=0.0,
            tools=[CREATE_FILE_SCHEMA],
            tool_choice="auto",
            stream=True
        )
        
        # Process streaming response
        print("\nResponse stream started. Processing chunks:\n")
        
        # Stream statistics
        chunk_count = 0
        content_chunks = 0
        tool_call_chunks = 0
        accumulated_content = ""
        
        # Storage for accumulated tool calls
        tool_calls = []
        last_chunk = None  # Variable to store the last chunk
        
        # Process each chunk
        async for chunk in stream_response:
            chunk_count += 1
            last_chunk = chunk # Keep track of the last chunk
            
            # Print chunk number and type
            print(f"\n--- Chunk {chunk_count} ---")
            print(f"Chunk type: {type(chunk)}")
            
            if not hasattr(chunk, 'choices') or not chunk.choices:
                print("No choices in chunk")
                continue
            
            delta = chunk.choices[0].delta
            
            # Process content if present
            if hasattr(delta, 'content') and delta.content is not None:
                content_chunks += 1
                accumulated_content += delta.content
                print(f"Content: {delta.content}")
            
            # Look for tool calls
            if hasattr(delta, 'tool_calls') and delta.tool_calls:
                tool_call_chunks += 1
                print("Tool call detected in chunk!")
                
                for tool_call in delta.tool_calls:
                    print(f"Tool call: {tool_call.model_dump()}")
                    
                    # Track tool call parts
                    tool_call_index = tool_call.index if hasattr(tool_call, 'index') else 0
                    
                    # Initialize tool call if new
                    while len(tool_calls) <= tool_call_index:
                        tool_calls.append({
                            "id": "",
                            "type": "function",
                            "function": {"name": "", "arguments": ""}
                        })
                    
                    # Update tool call ID if present
                    if hasattr(tool_call, 'id') and tool_call.id:
                        tool_calls[tool_call_index]["id"] = tool_call.id
                    
                    # Update function name if present
                    if hasattr(tool_call, 'function'):
                        if hasattr(tool_call.function, 'name') and tool_call.function.name:
                            tool_calls[tool_call_index]["function"]["name"] = tool_call.function.name
                        
                        # Update function arguments if present
                        if hasattr(tool_call.function, 'arguments') and tool_call.function.arguments:
                            tool_calls[tool_call_index]["function"]["arguments"] += tool_call.function.arguments
        
        # Summary after all chunks processed
        print("\n=== Streaming Summary ===")
        print(f"Total chunks: {chunk_count}")
        print(f"Content chunks: {content_chunks}")
        print(f"Tool call chunks: {tool_call_chunks}")
        
        if accumulated_content:
            print(f"\nAccumulated content: {accumulated_content}")
        
        if tool_calls:
            print("\nAccumulated tool calls:")
            for i, tool_call in enumerate(tool_calls):
                print(f"\nTool call {i+1}:")
                print(f"  ID: {tool_call['id']}")
                print(f"  Type: {tool_call['type']}")
                print(f"  Function: {tool_call['function']['name']}")
                print(f"  Arguments: {tool_call['function']['arguments']}")
                
                # Try to parse arguments
                try:
                    args = json.loads(tool_call['function']['arguments'])
                    print("\nParsed arguments:")
                    print(json.dumps(args, indent=4))
                except Exception as e:
                    print(f"Error parsing arguments: {str(e)}")
        else:
            print("\nNo tool calls accumulated from streaming response.")

        # --- Added logging for last chunk and finish reason ---
        finish_reason = None
        if last_chunk:
            try:
                if hasattr(last_chunk, 'choices') and last_chunk.choices:
                    finish_reason = last_chunk.choices[0].finish_reason
                last_chunk_data = last_chunk.model_dump() if hasattr(last_chunk, 'model_dump') else vars(last_chunk)
                print("\n--- Last Chunk Received ---")
                print(f"Finish Reason: {finish_reason}")
                print(f"Raw Last Chunk Data: {json.dumps(last_chunk_data, indent=2)}")
            except Exception as log_ex:
                print("\n--- Error logging last chunk ---")
                print(f"Error: {log_ex}")
                print(f"Last Chunk (repr): {repr(last_chunk)}")
        else:
            print("\n--- No last chunk recorded ---")
        # --- End added logging ---
    
    except Exception as e:
        logger.error(f"Error in streaming test: {str(e)}", exc_info=True)

async def main():
    """Run both tests for comparison."""
    # await test_simple_tool_call()
    await test_streaming_tool_call()

if __name__ == "__main__":
    asyncio.run(main())
wip 2025-04-07 19:08:58 +08:00			`"""`
			`Simple test script for LLM API with tool calling functionality.`

			`This script tests basic tool calling with both streaming and non-streaming to verify functionality.`
			`"""`

			`import asyncio`
			`import json`
			`from typing import Dict, Any`

			`from services.llm import make_llm_api_call`
			`from utils.logger import logger`

			`# Example tool schema from files_tool.py`
			`CREATE_FILE_SCHEMA = {`
			`"type": "function",`
			`"function": {`
			`"name": "create_file",`
			`"description": "Create a new file with the provided contents at a given path in the workspace",`
			`"parameters": {`
			`"type": "object",`
			`"properties": {`
			`"file_path": {`
			`"type": "string",`
			`"description": "Path to the file to be created"`
			`},`
			`"file_contents": {`
			`"type": "string",`
			`"description": "The content to write to the file"`
			`}`
			`},`
			`"required": ["file_path", "file_contents"]`
			`}`
			`}`
			`}`

			`async def test_simple_tool_call():`
			`"""Test a simple non-streaming tool call to verify functionality."""`
			`# Setup conversation`
			`messages = [`
			`{"role": "system", "content": "You are a helpful assistant with access to file management tools."},`
			`{"role": "user", "content": "Create an HTML file named hello.html with a simple Hello World message."}`
			`]`

			`print("\n=== Testing non-streaming tool call ===\n")`

			`try:`
			`# Make API call with tool`
			`response = await make_llm_api_call(`
			`messages=messages,`
			`model_name="gpt-4o",`
			`temperature=0.0,`
			`tools=[CREATE_FILE_SCHEMA],`
			`tool_choice={"type": "function", "function": {"name": "create_file"}}`
			`)`

			`# Print basic response info`
			`print(f"Response model: {response.model}")`
			`print(f"Response type: {type(response)}")`

			`# Check if the response has tool calls`
			`assistant_message = response.choices[0].message`
			`print(f"\nAssistant message content: {assistant_message.content}")`

			`if hasattr(assistant_message, 'tool_calls') and assistant_message.tool_calls:`
			`print("\nTool calls detected:")`

			`for i, tool_call in enumerate(assistant_message.tool_calls):`
			`print(f"\nTool call {i+1}:")`
			`print(f" ID: {tool_call.id}")`
			`print(f" Type: {tool_call.type}")`
			`print(f" Function: {tool_call.function.name}")`
			`print(f" Arguments:")`

			`try:`
			`args = json.loads(tool_call.function.arguments)`
			`print(json.dumps(args, indent=4))`

			`# Access and print specific arguments`
			`if tool_call.function.name == "create_file":`
			`print(f"\nFile path: {args.get('file_path')}")`
			`print(f"File contents length: {len(args.get('file_contents', ''))}")`
			`print(f"File contents preview: {args.get('file_contents', '')[:100]}...")`
			`except Exception as e:`
			`print(f"Error parsing arguments: {e}")`
			`else:`
			`print("\nNo tool calls found in the response.")`
			`print(f"Full response: {response}")`

			`except Exception as e:`
			`logger.error(f"Error in test: {str(e)}", exc_info=True)`

			`async def test_streaming_tool_call():`
			`"""Test tool calling with streaming to observe behavior."""`
			`# Setup conversation`
			`messages = [`
native tool call wip 2025-04-08 01:35:40 +08:00			`{"role": "system", "content": "You are a helpful assistant with access to file management tools. YOU ALWAYS USE MULTIPLE TOOL FUNCTION CALLS AT ONCE. YOU NEVER USE ONE TOOL FUNCTION CALL AT A TIME."},`
			`{"role": "user", "content": "Create 10 random files with different extensions and content."}`
wip 2025-04-07 19:08:58 +08:00			`]`

			`print("\n=== Testing streaming tool call ===\n")`

			`try:`
			`# Make API call with tool in streaming mode`
			`print("Sending streaming request...")`
			`stream_response = await make_llm_api_call(`
			`messages=messages,`
native tool call wip 2025-04-08 01:35:40 +08:00			`model_name="anthropic/claude-3-5-sonnet-latest",`
wip 2025-04-07 19:08:58 +08:00			`temperature=0.0,`
			`tools=[CREATE_FILE_SCHEMA],`
native tool call wip 2025-04-08 01:35:40 +08:00			`tool_choice="auto",`
wip 2025-04-07 19:08:58 +08:00			`stream=True`
			`)`

			`# Process streaming response`
			`print("\nResponse stream started. Processing chunks:\n")`

			`# Stream statistics`
			`chunk_count = 0`
			`content_chunks = 0`
			`tool_call_chunks = 0`
			`accumulated_content = ""`

			`# Storage for accumulated tool calls`
			`tool_calls = []`
native tool call wip 2025-04-08 01:35:40 +08:00			`last_chunk = None # Variable to store the last chunk`
wip 2025-04-07 19:08:58 +08:00
			`# Process each chunk`
			`async for chunk in stream_response:`
			`chunk_count += 1`
native tool call wip 2025-04-08 01:35:40 +08:00			`last_chunk = chunk # Keep track of the last chunk`
wip 2025-04-07 19:08:58 +08:00
			`# Print chunk number and type`
			`print(f"\n--- Chunk {chunk_count} ---")`
			`print(f"Chunk type: {type(chunk)}")`

			`if not hasattr(chunk, 'choices') or not chunk.choices:`
			`print("No choices in chunk")`
			`continue`

			`delta = chunk.choices[0].delta`

			`# Process content if present`
			`if hasattr(delta, 'content') and delta.content is not None:`
			`content_chunks += 1`
			`accumulated_content += delta.content`
			`print(f"Content: {delta.content}")`

			`# Look for tool calls`
			`if hasattr(delta, 'tool_calls') and delta.tool_calls:`
			`tool_call_chunks += 1`
			`print("Tool call detected in chunk!")`

			`for tool_call in delta.tool_calls:`
			`print(f"Tool call: {tool_call.model_dump()}")`

			`# Track tool call parts`
			`tool_call_index = tool_call.index if hasattr(tool_call, 'index') else 0`

			`# Initialize tool call if new`
			`while len(tool_calls) <= tool_call_index:`
			`tool_calls.append({`
			`"id": "",`
			`"type": "function",`
			`"function": {"name": "", "arguments": ""}`
			`})`

			`# Update tool call ID if present`
			`if hasattr(tool_call, 'id') and tool_call.id:`
			`tool_calls[tool_call_index]["id"] = tool_call.id`

			`# Update function name if present`
			`if hasattr(tool_call, 'function'):`
			`if hasattr(tool_call.function, 'name') and tool_call.function.name:`
			`tool_calls[tool_call_index]["function"]["name"] = tool_call.function.name`

			`# Update function arguments if present`
			`if hasattr(tool_call.function, 'arguments') and tool_call.function.arguments:`
			`tool_calls[tool_call_index]["function"]["arguments"] += tool_call.function.arguments`

			`# Summary after all chunks processed`
			`print("\n=== Streaming Summary ===")`
			`print(f"Total chunks: {chunk_count}")`
			`print(f"Content chunks: {content_chunks}")`
			`print(f"Tool call chunks: {tool_call_chunks}")`

			`if accumulated_content:`
			`print(f"\nAccumulated content: {accumulated_content}")`

			`if tool_calls:`
			`print("\nAccumulated tool calls:")`
			`for i, tool_call in enumerate(tool_calls):`
			`print(f"\nTool call {i+1}:")`
			`print(f" ID: {tool_call['id']}")`
			`print(f" Type: {tool_call['type']}")`
			`print(f" Function: {tool_call['function']['name']}")`
			`print(f" Arguments: {tool_call['function']['arguments']}")`

			`# Try to parse arguments`
			`try:`
			`args = json.loads(tool_call['function']['arguments'])`
			`print("\nParsed arguments:")`
			`print(json.dumps(args, indent=4))`
			`except Exception as e:`
			`print(f"Error parsing arguments: {str(e)}")`
			`else:`
			`print("\nNo tool calls accumulated from streaming response.")`
native tool call wip 2025-04-08 01:35:40 +08:00
			`# --- Added logging for last chunk and finish reason ---`
			`finish_reason = None`
			`if last_chunk:`
			`try:`
			`if hasattr(last_chunk, 'choices') and last_chunk.choices:`
			`finish_reason = last_chunk.choices[0].finish_reason`
			`last_chunk_data = last_chunk.model_dump() if hasattr(last_chunk, 'model_dump') else vars(last_chunk)`
			`print("\n--- Last Chunk Received ---")`
			`print(f"Finish Reason: {finish_reason}")`
			`print(f"Raw Last Chunk Data: {json.dumps(last_chunk_data, indent=2)}")`
			`except Exception as log_ex:`
			`print("\n--- Error logging last chunk ---")`
			`print(f"Error: {log_ex}")`
			`print(f"Last Chunk (repr): {repr(last_chunk)}")`
			`else:`
			`print("\n--- No last chunk recorded ---")`
			`# --- End added logging ---`
wip 2025-04-07 19:08:58 +08:00
			`except Exception as e:`
			`logger.error(f"Error in streaming test: {str(e)}", exc_info=True)`

			`async def main():`
			`"""Run both tests for comparison."""`
			`# await test_simple_tool_call()`
			`await test_streaming_tool_call()`

			`if __name__ == "__main__":`
			`asyncio.run(main())`