suna/backend/agent/run.py

import os
import json
import re
from uuid import uuid4
from typing import Optional

# from agent.tools.message_tool import MessageTool
from agent.tools.message_tool import MessageTool
from agent.tools.sb_deploy_tool import SandboxDeployTool
from agent.tools.sb_expose_tool import SandboxExposeTool
from agent.tools.web_search_tool import SandboxWebSearchTool
from dotenv import load_dotenv
from utils.config import config

from agentpress.thread_manager import ThreadManager
from agentpress.response_processor import ProcessorConfig
from agent.tools.sb_shell_tool import SandboxShellTool
from agent.tools.sb_files_tool import SandboxFilesTool
from agent.tools.sb_browser_tool import SandboxBrowserTool
from agent.tools.data_providers_tool import DataProvidersTool
from agent.prompt import get_system_prompt
from utils.logger import logger
from utils.auth_utils import get_account_id_from_thread
from services.billing import check_billing_status
from agent.tools.sb_vision_tool import SandboxVisionTool
from services.langfuse import langfuse
from langfuse.client import StatefulTraceClient
from services.langfuse import langfuse
from agent.gemini_prompt import get_gemini_system_prompt
from agent.tools.mcp_tool_wrapper import MCPToolWrapper
from agentpress.tool import SchemaType

load_dotenv()

async def run_agent(
    thread_id: str,
    project_id: str,
    stream: bool,
    thread_manager: Optional[ThreadManager] = None,
    native_max_auto_continues: int = 25,
    max_iterations: int = 100,
    model_name: str = "anthropic/claude-3-7-sonnet-latest",
    enable_thinking: Optional[bool] = False,
    reasoning_effort: Optional[str] = 'low',
    enable_context_manager: bool = True,
    agent_config: Optional[dict] = None,    
    trace: Optional[StatefulTraceClient] = None,
    is_agent_builder: Optional[bool] = False
):
    """Run the development agent with specified configuration."""
    logger.info(f"🚀 Starting agent with model: {model_name}")
    if agent_config:
        logger.info(f"Using custom agent: {agent_config.get('name', 'Unknown')}")

    if not trace:
        trace = langfuse.trace(name="run_agent", session_id=thread_id, metadata={"project_id": project_id})
    thread_manager = ThreadManager(trace=trace)

    client = await thread_manager.db.client

    # Get account ID from thread for billing checks
    account_id = await get_account_id_from_thread(client, thread_id)
    if not account_id:
        raise ValueError("Could not determine account ID for thread")

    # Get sandbox info from project
    project = await client.table('projects').select('*').eq('project_id', project_id).execute()
    if not project.data or len(project.data) == 0:
        raise ValueError(f"Project {project_id} not found")

    project_data = project.data[0]
    sandbox_info = project_data.get('sandbox', {})
    if not sandbox_info.get('id'):
        raise ValueError(f"No sandbox found for project {project_id}")

    # Initialize tools with project_id instead of sandbox object
    # This ensures each tool independently verifies it's operating on the correct project
    
    # Get enabled tools from agent config, or use defaults
    enabled_tools = None
    if agent_config and 'agentpress_tools' in agent_config:
        enabled_tools = agent_config['agentpress_tools']
        logger.info(f"Using custom tool configuration from agent")
    
    # Register tools based on configuration
    # If no agent config (enabled_tools is None), register ALL tools for full Suna capabilities
    # If agent config exists, only register explicitly enabled tools
    if is_agent_builder:
        logger.info("Agent builder mode - registering only update agent tool")
        from agent.tools.update_agent_tool import UpdateAgentTool
        from services.supabase import DBConnection
        db = DBConnection()
        target_agent_id = agent_config.get('target_agent_id') if agent_config else None
        update_tool = UpdateAgentTool(db, target_agent_id)

    if enabled_tools is None:
        # No agent specified - register ALL tools for full Suna experience
        logger.info("No agent specified - registering all tools for full Suna capabilities")
        thread_manager.add_tool(SandboxShellTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxFilesTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxBrowserTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxDeployTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxExposeTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(MessageTool)
        thread_manager.add_tool(SandboxWebSearchTool, project_id=project_id, thread_manager=thread_manager)
        thread_manager.add_tool(SandboxVisionTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
        if config.RAPID_API_KEY:
            thread_manager.add_tool(DataProvidersTool)
    else:
        # Agent specified - only register explicitly enabled tools
        logger.info("Custom agent specified - registering only enabled tools")
        if enabled_tools.get('sb_shell_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxShellTool, project_id=project_id, thread_manager=thread_manager)
        if enabled_tools.get('sb_files_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxFilesTool, project_id=project_id, thread_manager=thread_manager)
        if enabled_tools.get('sb_browser_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxBrowserTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
        if enabled_tools.get('sb_deploy_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxDeployTool, project_id=project_id, thread_manager=thread_manager)
        if enabled_tools.get('sb_expose_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxExposeTool, project_id=project_id, thread_manager=thread_manager)
        if enabled_tools.get('message_tool', {}).get('enabled', False):
            thread_manager.add_tool(MessageTool)
        if enabled_tools.get('web_search_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxWebSearchTool, project_id=project_id, thread_manager=thread_manager)
        if enabled_tools.get('sb_vision_tool', {}).get('enabled', False):
            thread_manager.add_tool(SandboxVisionTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
        if config.RAPID_API_KEY and enabled_tools.get('data_providers_tool', {}).get('enabled', False):
            thread_manager.add_tool(DataProvidersTool)

    # Register MCP tool wrapper if agent has configured MCPs
    mcp_wrapper_instance = None
    if agent_config and agent_config.get('configured_mcps'):
        logger.info(f"Registering MCP tool wrapper for {len(agent_config['configured_mcps'])} MCP servers")
        # Register the tool
        thread_manager.add_tool(MCPToolWrapper, mcp_configs=agent_config['configured_mcps'])
        
        # Get the tool instance from the registry
        # The tool is registered with method names as keys
        for tool_name, tool_info in thread_manager.tool_registry.tools.items():
            if isinstance(tool_info['instance'], MCPToolWrapper):
                mcp_wrapper_instance = tool_info['instance']
                break
        
        # Initialize the MCP tools asynchronously
        if mcp_wrapper_instance:
            try:
                await mcp_wrapper_instance.initialize_and_register_tools()
                logger.info("MCP tools initialized successfully")
                
                # Re-register the updated schemas with the tool registry
                # This ensures the dynamically created tools are available for function calling
                updated_schemas = mcp_wrapper_instance.get_schemas()
                for method_name, schema_list in updated_schemas.items():
                    if method_name != 'call_mcp_tool':  # Skip the fallback method
                        # Register each dynamic tool in the registry
                        for schema in schema_list:
                            if schema.schema_type == SchemaType.OPENAPI:
                                thread_manager.tool_registry.tools[method_name] = {
                                    "instance": mcp_wrapper_instance,
                                    "schema": schema
                                }
                                logger.debug(f"Registered dynamic MCP tool: {method_name}")
                
            except Exception as e:
                logger.error(f"Failed to initialize MCP tools: {e}")
                # Continue without MCP tools if initialization fails

    # Prepare system prompt
    # First, get the default system prompt
    if "gemini-2.5-flash" in model_name.lower():
        default_system_content = get_gemini_system_prompt()
    else:
        # Use the original prompt - the LLM can only use tools that are registered
        default_system_content = get_system_prompt()
        
    # Add sample response for non-anthropic models
    if "anthropic" not in model_name.lower():
        sample_response_path = os.path.join(os.path.dirname(__file__), 'sample_responses/1.txt')
        with open(sample_response_path, 'r') as file:
            sample_response = file.read()
        default_system_content = default_system_content + "\n\n <sample_assistant_response>" + sample_response + "</sample_assistant_response>"
    
    # Handle custom agent system prompt
    if agent_config and agent_config.get('system_prompt'):
        custom_system_prompt = agent_config['system_prompt'].strip()
        
        # Completely replace the default system prompt with the custom one
        # This prevents confusion and tool hallucination
        system_content = custom_system_prompt
        logger.info(f"Using ONLY custom agent system prompt for: {agent_config.get('name', 'Unknown')}")
    else:
        # Use just the default system prompt
        system_content = default_system_content
        logger.info("Using default system prompt only")
    
    # Add MCP tool information to system prompt if MCP tools are configured
    if agent_config and agent_config.get('configured_mcps') and mcp_wrapper_instance and mcp_wrapper_instance._initialized:
        mcp_info = "\n\n--- MCP Tools Available ---\n"
        mcp_info += "You have access to external MCP (Model Context Protocol) server tools.\n"
        mcp_info += "MCP tools can be called directly using their native function names in the standard function calling format:\n"
        mcp_info += '<function_calls>\n'
        mcp_info += '<invoke name="{tool_name}">\n'
        mcp_info += '<parameter name="param1">value1</parameter>\n'
        mcp_info += '<parameter name="param2">value2</parameter>\n'
        mcp_info += '</invoke>\n'
        mcp_info += '</function_calls>\n\n'
        
        # List available MCP tools
        mcp_info += "Available MCP tools:\n"
        try:
            # Get the actual registered schemas from the wrapper
            registered_schemas = mcp_wrapper_instance.get_schemas()
            for method_name, schema_list in registered_schemas.items():
                if method_name == 'call_mcp_tool':
                    continue  # Skip the fallback method
                    
                # Get the schema info
                for schema in schema_list:
                    if schema.schema_type == SchemaType.OPENAPI:
                        func_info = schema.schema.get('function', {})
                        description = func_info.get('description', 'No description available')
                        # Extract server name from description if available
                        server_match = description.find('(MCP Server: ')
                        if server_match != -1:
                            server_end = description.find(')', server_match)
                            server_info = description[server_match:server_end+1]
                        else:
                            server_info = ''
                        
                        mcp_info += f"- **{method_name}**: {description}\n"
                        
                        # Show parameter info
                        params = func_info.get('parameters', {})
                        props = params.get('properties', {})
                        if props:
                            mcp_info += f"  Parameters: {', '.join(props.keys())}\n"
                            
        except Exception as e:
            logger.error(f"Error listing MCP tools: {e}")
            mcp_info += "- Error loading MCP tool list\n"
        
        # Add critical instructions for using search results
        mcp_info += "\n🚨 CRITICAL MCP TOOL RESULT INSTRUCTIONS 🚨\n"
        mcp_info += "When you use ANY MCP (Model Context Protocol) tools:\n"
        mcp_info += "1. ALWAYS read and use the EXACT results returned by the MCP tool\n"
        mcp_info += "2. For search tools: ONLY cite URLs, sources, and information from the actual search results\n"
        mcp_info += "3. For any tool: Base your response entirely on the tool's output - do NOT add external information\n"
        mcp_info += "4. DO NOT fabricate, invent, hallucinate, or make up any sources, URLs, or data\n"
        mcp_info += "5. If you need more information, call the MCP tool again with different parameters\n"
        mcp_info += "6. When writing reports/summaries: Reference ONLY the data from MCP tool results\n"
        mcp_info += "7. If the MCP tool doesn't return enough information, explicitly state this limitation\n"
        mcp_info += "8. Always double-check that every fact, URL, and reference comes from the MCP tool output\n"
        mcp_info += "\nIMPORTANT: MCP tool results are your PRIMARY and ONLY source of truth for external data!\n"
        mcp_info += "NEVER supplement MCP results with your training data or make assumptions beyond what the tools provide.\n"
        
        system_content += mcp_info
    
    system_message = { "role": "system", "content": system_content }

    iteration_count = 0
    continue_execution = True

    latest_user_message = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'user').order('created_at', desc=True).limit(1).execute()
    if latest_user_message.data and len(latest_user_message.data) > 0:
        data = json.loads(latest_user_message.data[0]['content'])
        trace.update(input=data['content'])

    while continue_execution and iteration_count < max_iterations:
        iteration_count += 1
        logger.info(f"🔄 Running iteration {iteration_count} of {max_iterations}...")

        # Billing check on each iteration - still needed within the iterations
        can_run, message, subscription = await check_billing_status(client, account_id)
        if not can_run:
            error_msg = f"Billing limit reached: {message}"
            trace.event(name="billing_limit_reached", level="ERROR", status_message=(f"{error_msg}"))
            # Yield a special message to indicate billing limit reached
            yield {
                "type": "status",
                "status": "stopped",
                "message": error_msg
            }
            break
        # Check if last message is from assistant using direct Supabase query
        latest_message = await client.table('messages').select('*').eq('thread_id', thread_id).in_('type', ['assistant', 'tool', 'user']).order('created_at', desc=True).limit(1).execute()
        if latest_message.data and len(latest_message.data) > 0:
            message_type = latest_message.data[0].get('type')
            if message_type == 'assistant':
                logger.info(f"Last message was from assistant, stopping execution")
                trace.event(name="last_message_from_assistant", level="DEFAULT", status_message=(f"Last message was from assistant, stopping execution"))
                continue_execution = False
                break

        # ---- Temporary Message Handling (Browser State & Image Context) ----
        temporary_message = None
        temp_message_content_list = [] # List to hold text/image blocks

        # Get the latest browser_state message
        latest_browser_state_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
        if latest_browser_state_msg.data and len(latest_browser_state_msg.data) > 0:
            try:
                browser_content = json.loads(latest_browser_state_msg.data[0]["content"])
                screenshot_base64 = browser_content.get("screenshot_base64")
                screenshot_url = browser_content.get("screenshot_url")
                
                # Create a copy of the browser state without screenshot data
                browser_state_text = browser_content.copy()
                browser_state_text.pop('screenshot_base64', None)
                browser_state_text.pop('screenshot_url', None)

                if browser_state_text:
                    temp_message_content_list.append({
                        "type": "text",
                        "text": f"The following is the current state of the browser:\n{json.dumps(browser_state_text, indent=2)}"
                    })
                    
                # Prioritize screenshot_url if available
                if screenshot_url:
                    temp_message_content_list.append({
                        "type": "image_url",
                        "image_url": {
                            "url": screenshot_url,
                        }
                    })
                elif screenshot_base64:
                    # Fallback to base64 if URL not available
                    temp_message_content_list.append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{screenshot_base64}",
                        }
                    })
                else:
                    logger.warning("Browser state found but no screenshot data.")

            except Exception as e:
                logger.error(f"Error parsing browser state: {e}")
                trace.event(name="error_parsing_browser_state", level="ERROR", status_message=(f"{e}"))

        # Get the latest image_context message (NEW)
        latest_image_context_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'image_context').order('created_at', desc=True).limit(1).execute()
        if latest_image_context_msg.data and len(latest_image_context_msg.data) > 0:
            try:
                image_context_content = json.loads(latest_image_context_msg.data[0]["content"])
                base64_image = image_context_content.get("base64")
                mime_type = image_context_content.get("mime_type")
                file_path = image_context_content.get("file_path", "unknown file")

                if base64_image and mime_type:
                    temp_message_content_list.append({
                        "type": "text",
                        "text": f"Here is the image you requested to see: '{file_path}'"
                    })
                    temp_message_content_list.append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{mime_type};base64,{base64_image}",
                        }
                    })
                else:
                    logger.warning(f"Image context found for '{file_path}' but missing base64 or mime_type.")

                await client.table('messages').delete().eq('message_id', latest_image_context_msg.data[0]["message_id"]).execute()
            except Exception as e:
                logger.error(f"Error parsing image context: {e}")
                trace.event(name="error_parsing_image_context", level="ERROR", status_message=(f"{e}"))

        # If we have any content, construct the temporary_message
        if temp_message_content_list:
            temporary_message = {"role": "user", "content": temp_message_content_list}
            # logger.debug(f"Constructed temporary message with {len(temp_message_content_list)} content blocks.")
        # ---- End Temporary Message Handling ----

        # Set max_tokens based on model
        max_tokens = None
        if "sonnet" in model_name.lower():
            max_tokens = 64000
        elif "gpt-4" in model_name.lower():
            max_tokens = 4096
            
        generation = trace.generation(name="thread_manager.run_thread")
        try:
            # Make the LLM call and process the response
            response = await thread_manager.run_thread(
                thread_id=thread_id,
                system_prompt=system_message,
                stream=stream,
                llm_model=model_name,
                llm_temperature=0,
                llm_max_tokens=max_tokens,
                tool_choice="auto",
                max_xml_tool_calls=1,
                temporary_message=temporary_message,
                processor_config=ProcessorConfig(
                    xml_tool_calling=True,
                    native_tool_calling=False,
                    execute_tools=True,
                    execute_on_stream=True,
                    tool_execution_strategy="parallel",
                    xml_adding_strategy="user_message"
                ),
                native_max_auto_continues=native_max_auto_continues,
                include_xml_examples=True,
                enable_thinking=enable_thinking,
                reasoning_effort=reasoning_effort,
                enable_context_manager=enable_context_manager,
                generation=generation
            )

            if isinstance(response, dict) and "status" in response and response["status"] == "error":
                logger.error(f"Error response from run_thread: {response.get('message', 'Unknown error')}")
                trace.event(name="error_response_from_run_thread", level="ERROR", status_message=(f"{response.get('message', 'Unknown error')}"))
                yield response
                break

            # Track if we see ask, complete, or web-browser-takeover tool calls
            last_tool_call = None
            agent_should_terminate = False

            # Process the response
            error_detected = False
            try:
                full_response = ""
                async for chunk in response:
                    # If we receive an error chunk, we should stop after this iteration
                    if isinstance(chunk, dict) and chunk.get('type') == 'status' and chunk.get('status') == 'error':
                        logger.error(f"Error chunk detected: {chunk.get('message', 'Unknown error')}")
                        trace.event(name="error_chunk_detected", level="ERROR", status_message=(f"{chunk.get('message', 'Unknown error')}"))
                        error_detected = True
                        yield chunk  # Forward the error chunk
                        continue     # Continue processing other chunks but don't break yet
                    
                    # Check for termination signal in status messages
                    if chunk.get('type') == 'status':
                        try:
                            # Parse the metadata to check for termination signal
                            metadata = chunk.get('metadata', {})
                            if isinstance(metadata, str):
                                metadata = json.loads(metadata)
                            
                            if metadata.get('agent_should_terminate'):
                                agent_should_terminate = True
                                logger.info("Agent termination signal detected in status message")
                                trace.event(name="agent_termination_signal_detected", level="DEFAULT", status_message="Agent termination signal detected in status message")
                                
                                # Extract the tool name from the status content if available
                                content = chunk.get('content', {})
                                if isinstance(content, str):
                                    content = json.loads(content)
                                
                                if content.get('function_name'):
                                    last_tool_call = content['function_name']
                                elif content.get('xml_tag_name'):
                                    last_tool_call = content['xml_tag_name']
                                    
                        except Exception as e:
                            logger.debug(f"Error parsing status message for termination check: {e}")
                        
                    # Check for XML versions like <ask>, <complete>, or <web-browser-takeover> in assistant content chunks
                    if chunk.get('type') == 'assistant' and 'content' in chunk:
                        try:
                            # The content field might be a JSON string or object
                            content = chunk.get('content', '{}')
                            if isinstance(content, str):
                                assistant_content_json = json.loads(content)
                            else:
                                assistant_content_json = content

                            # The actual text content is nested within
                            assistant_text = assistant_content_json.get('content', '')
                            full_response += assistant_text
                            if isinstance(assistant_text, str):
                                if '</ask>' in assistant_text or '</complete>' in assistant_text or '</web-browser-takeover>' in assistant_text:
                                   if '</ask>' in assistant_text:
                                       xml_tool = 'ask'
                                   elif '</complete>' in assistant_text:
                                       xml_tool = 'complete'
                                   elif '</web-browser-takeover>' in assistant_text:
                                       xml_tool = 'web-browser-takeover'

                                   last_tool_call = xml_tool
                                   logger.info(f"Agent used XML tool: {xml_tool}")
                                   trace.event(name="agent_used_xml_tool", level="DEFAULT", status_message=(f"Agent used XML tool: {xml_tool}"))
                        except json.JSONDecodeError:
                            # Handle cases where content might not be valid JSON
                            logger.warning(f"Warning: Could not parse assistant content JSON: {chunk.get('content')}")
                            trace.event(name="warning_could_not_parse_assistant_content_json", level="WARNING", status_message=(f"Warning: Could not parse assistant content JSON: {chunk.get('content')}"))
                        except Exception as e:
                            logger.error(f"Error processing assistant chunk: {e}")
                            trace.event(name="error_processing_assistant_chunk", level="ERROR", status_message=(f"Error processing assistant chunk: {e}"))

                    yield chunk

                # Check if we should stop based on the last tool call or error
                if error_detected:
                    logger.info(f"Stopping due to error detected in response")
                    trace.event(name="stopping_due_to_error_detected_in_response", level="DEFAULT", status_message=(f"Stopping due to error detected in response"))
                    generation.end(output=full_response, status_message="error_detected", level="ERROR")
                    break
                    
                if agent_should_terminate or last_tool_call in ['ask', 'complete', 'web-browser-takeover']:
                    logger.info(f"Agent decided to stop with tool: {last_tool_call}")
                    trace.event(name="agent_decided_to_stop_with_tool", level="DEFAULT", status_message=(f"Agent decided to stop with tool: {last_tool_call}"))
                    generation.end(output=full_response, status_message="agent_stopped")
                    continue_execution = False

            except Exception as e:
                # Just log the error and re-raise to stop all iterations
                error_msg = f"Error during response streaming: {str(e)}"
                logger.error(f"Error: {error_msg}")
                trace.event(name="error_during_response_streaming", level="ERROR", status_message=(f"Error during response streaming: {str(e)}"))
                generation.end(output=full_response, status_message=error_msg, level="ERROR")
                yield {
                    "type": "status",
                    "status": "error",
                    "message": error_msg
                }
                # Stop execution immediately on any error
                break
                
        except Exception as e:
            # Just log the error and re-raise to stop all iterations
            error_msg = f"Error running thread: {str(e)}"
            logger.error(f"Error: {error_msg}")
            trace.event(name="error_running_thread", level="ERROR", status_message=(f"Error running thread: {str(e)}"))
            yield {
                "type": "status",
                "status": "error",
                "message": error_msg
            }
            # Stop execution immediately on any error
            break
        generation.end(output=full_response)

    langfuse.flush() # Flush Langfuse events at the end of the run
  

# # TESTING

# async def test_agent():
#     """Test function to run the agent with a sample query"""
#     from agentpress.thread_manager import ThreadManager
#     from services.supabase import DBConnection

#     # Initialize ThreadManager
#     thread_manager = ThreadManager()

#     # Create a test thread directly with Postgres function
#     client = await DBConnection().client

#     try:
#         # Get user's personal account
#         account_result = await client.rpc('get_personal_account').execute()

#         # if not account_result.data:
#         #     print("Error: No personal account found")
#         #     return

#         account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59"

#         if not account_id:
#             print("Error: Could not get account ID")
#             return

#         # Find or create a test project in the user's account
#         project_result = await client.table('projects').select('*').eq('name', 'test11').eq('account_id', account_id).execute()

#         if project_result.data and len(project_result.data) > 0:
#             # Use existing test project
#             project_id = project_result.data[0]['project_id']
#             print(f"\n🔄 Using existing test project: {project_id}")
#         else:
#             # Create new test project if none exists
#             project_result = await client.table('projects').insert({
#                 "name": "test11",
#                 "account_id": account_id
#             }).execute()
#             project_id = project_result.data[0]['project_id']
#             print(f"\n✨ Created new test project: {project_id}")

#         # Create a thread for this project
#         thread_result = await client.table('threads').insert({
#             'project_id': project_id,
#             'account_id': account_id
#         }).execute()
#         thread_data = thread_result.data[0] if thread_result.data else None

#         if not thread_data:
#             print("Error: No thread data returned")
#             return

#         thread_id = thread_data['thread_id']
#     except Exception as e:
#         print(f"Error setting up thread: {str(e)}")
#         return

#     print(f"\n🤖 Agent Thread Created: {thread_id}\n")

#     # Interactive message input loop
#     while True:
#         # Get user input
#         user_message = input("\n💬 Enter your message (or 'exit' to quit): ")
#         if user_message.lower() == 'exit':
#             break

#         if not user_message.strip():
#             print("\n🔄 Running agent...\n")
#             await process_agent_response(thread_id, project_id, thread_manager)
#             continue

#         # Add the user message to the thread
#         await thread_manager.add_message(
#             thread_id=thread_id,
#             type="user",
#             content={
#                 "role": "user",
#                 "content": user_message
#             },
#             is_llm_message=True
#         )

#         print("\n🔄 Running agent...\n")
#         await process_agent_response(thread_id, project_id, thread_manager)

#     print("\n👋 Test completed. Goodbye!")

# async def process_agent_response(
#     thread_id: str,
#     project_id: str,
#     thread_manager: ThreadManager,
#     stream: bool = True,
#     model_name: str = "anthropic/claude-3-7-sonnet-latest",
#     enable_thinking: Optional[bool] = False,
#     reasoning_effort: Optional[str] = 'low',
#     enable_context_manager: bool = True
# ):
#     """Process the streaming response from the agent."""
#     chunk_counter = 0
#     current_response = ""
#     tool_usage_counter = 0 # Renamed from tool_call_counter as we track usage via status

#     # Create a test sandbox for processing with a unique test prefix to avoid conflicts with production sandboxes
#     sandbox_pass = str(uuid4())
#     sandbox = create_sandbox(sandbox_pass)

#     # Store the original ID so we can refer to it
#     original_sandbox_id = sandbox.id

#     # Generate a clear test identifier
#     test_prefix = f"test_{uuid4().hex[:8]}_"
#     logger.info(f"Created test sandbox with ID {original_sandbox_id} and test prefix {test_prefix}")

#     # Log the sandbox URL for debugging
#     print(f"\033[91mTest sandbox created: {str(sandbox.get_preview_link(6080))}/vnc_lite.html?password={sandbox_pass}\033[0m")

#     async for chunk in run_agent(
#         thread_id=thread_id,
#         project_id=project_id,
#         sandbox=sandbox,
#         stream=stream,
#         thread_manager=thread_manager,
#         native_max_auto_continues=25,
#         model_name=model_name,
#         enable_thinking=enable_thinking,
#         reasoning_effort=reasoning_effort,
#         enable_context_manager=enable_context_manager
#     ):
#         chunk_counter += 1
#         # print(f"CHUNK: {chunk}") # Uncomment for debugging

#         if chunk.get('type') == 'assistant':
#             # Try parsing the content JSON
#             try:
#                 # Handle content as string or object
#                 content = chunk.get('content', '{}')
#                 if isinstance(content, str):
#                     content_json = json.loads(content)
#                 else:
#                     content_json = content

#                 actual_content = content_json.get('content', '')
#                 # Print the actual assistant text content as it comes
#                 if actual_content:
#                      # Check if it contains XML tool tags, if so, print the whole tag for context
#                     if '<' in actual_content and '>' in actual_content:
#                          # Avoid printing potentially huge raw content if it's not just text
#                          if len(actual_content) < 500: # Heuristic limit
#                             print(actual_content, end='', flush=True)
#                          else:
#                              # Maybe just print a summary if it's too long or contains complex XML
#                              if '</ask>' in actual_content: print("<ask>...</ask>", end='', flush=True)
#                              elif '</complete>' in actual_content: print("<complete>...</complete>", end='', flush=True)
#                              else: print("<tool_call>...</tool_call>", end='', flush=True) # Generic case
#                     else:
#                         # Regular text content
#                          print(actual_content, end='', flush=True)
#                     current_response += actual_content # Accumulate only text part
#             except json.JSONDecodeError:
#                  # If content is not JSON (e.g., just a string chunk), print directly
#                  raw_content = chunk.get('content', '')
#                  print(raw_content, end='', flush=True)
#                  current_response += raw_content
#             except Exception as e:
#                  print(f"\nError processing assistant chunk: {e}\n")

#         elif chunk.get('type') == 'tool': # Updated from 'tool_result'
#             # Add timestamp and format tool result nicely
#             tool_name = "UnknownTool" # Try to get from metadata if available
#             result_content = "No content"

#             # Parse metadata - handle both string and dict formats
#             metadata = chunk.get('metadata', {})
#             if isinstance(metadata, str):
#                 try:
#                     metadata = json.loads(metadata)
#                 except json.JSONDecodeError:
#                     metadata = {}

#             linked_assistant_msg_id = metadata.get('assistant_message_id')
#             parsing_details = metadata.get('parsing_details')
#             if parsing_details:
#                 tool_name = parsing_details.get('xml_tag_name', 'UnknownTool') # Get name from parsing details

#             try:
#                 # Content is a JSON string or object
#                 content = chunk.get('content', '{}')
#                 if isinstance(content, str):
#                     content_json = json.loads(content)
#                 else:
#                     content_json = content

#                 # The actual tool result is nested inside content.content
#                 tool_result_str = content_json.get('content', '')
#                  # Extract the actual tool result string (remove outer <tool_result> tag if present)
#                 match = re.search(rf'<{tool_name}>(.*?)</{tool_name}>', tool_result_str, re.DOTALL)
#                 if match:
#                     result_content = match.group(1).strip()
#                     # Try to parse the result string itself as JSON for pretty printing
#                     try:
#                         result_obj = json.loads(result_content)
#                         result_content = json.dumps(result_obj, indent=2)
#                     except json.JSONDecodeError:
#                          # Keep as string if not JSON
#                          pass
#                 else:
#                      # Fallback if tag extraction fails
#                      result_content = tool_result_str

#             except json.JSONDecodeError:
#                 result_content = chunk.get('content', 'Error parsing tool content')
#             except Exception as e:
#                 result_content = f"Error processing tool chunk: {e}"

#             print(f"\n\n🛠️  TOOL RESULT [{tool_name}] → {result_content}")

#         elif chunk.get('type') == 'status':
#             # Log tool status changes
#             try:
#                 # Handle content as string or object
#                 status_content = chunk.get('content', '{}')
#                 if isinstance(status_content, str):
#                     status_content = json.loads(status_content)

#                 status_type = status_content.get('status_type')
#                 function_name = status_content.get('function_name', '')
#                 xml_tag_name = status_content.get('xml_tag_name', '') # Get XML tag if available
#                 tool_name = xml_tag_name or function_name # Prefer XML tag name

#                 if status_type == 'tool_started' and tool_name:
#                     tool_usage_counter += 1
#                     print(f"\n⏳ TOOL STARTING #{tool_usage_counter} [{tool_name}]")
#                     print("  " + "-" * 40)
#                     # Return to the current content display
#                     if current_response:
#                         print("\nContinuing response:", flush=True)
#                         print(current_response, end='', flush=True)
#                 elif status_type == 'tool_completed' and tool_name:
#                      status_emoji = "✅"
#                      print(f"\n{status_emoji} TOOL COMPLETED: {tool_name}")
#                 elif status_type == 'finish':
#                      finish_reason = status_content.get('finish_reason', '')
#                      if finish_reason:
#                          print(f"\n📌 Finished: {finish_reason}")
#                 # else: # Print other status types if needed for debugging
#                 #    print(f"\nℹ️ STATUS: {chunk.get('content')}")

#             except json.JSONDecodeError:
#                  print(f"\nWarning: Could not parse status content JSON: {chunk.get('content')}")
#             except Exception as e:
#                 print(f"\nError processing status chunk: {e}")


#         # Removed elif chunk.get('type') == 'tool_call': block

#     # Update final message
#     print(f"\n\n✅ Agent run completed with {tool_usage_counter} tool executions")

#     # Try to clean up the test sandbox if possible
#     try:
#         # Attempt to delete/archive the sandbox to clean up resources
#         # Note: Actual deletion may depend on the Daytona SDK's capabilities
#         logger.info(f"Attempting to clean up test sandbox {original_sandbox_id}")
#         # If there's a method to archive/delete the sandbox, call it here
#         # Example: daytona.archive_sandbox(sandbox.id)
#     except Exception as e:
#         logger.warning(f"Failed to clean up test sandbox {original_sandbox_id}: {str(e)}")

# if __name__ == "__main__":
#     import asyncio

#     # Configure any environment variables or setup needed for testing
#     load_dotenv()  # Ensure environment variables are loaded

#     # Run the test function
#     asyncio.run(test_agent())
-												first edition

											
										
										
											2025-04-16 05:42:56 +08:00
+								import os
-												fe reference wip

											
										
										
											2025-03-30 14:48:57 +08:00
+								import json
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
+								import re
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
+								from uuid import uuid4
-												renaming all

											
										
										
											2025-04-17 01:23:28 +08:00
+								from typing import Optional
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
-												wip

											
										
										
											2025-04-17 06:13:04 +08:00
+								# from agent.tools.message_tool import MessageTool
-												wip

											
										
										
											2025-04-18 06:17:48 +08:00
+								from agent.tools.message_tool import MessageTool
-												wip

											
										
										
											2025-04-13 05:40:01 +08:00
+								from agent.tools.sb_deploy_tool import SandboxDeployTool
-												add port exposing

											
										
										
											2025-04-21 22:01:51 +08:00
+								from agent.tools.sb_expose_tool import SandboxExposeTool
-												session shell tool, save web search as file, change prompt to process scrapes via cli

											
										
										
											2025-05-10 11:46:48 +08:00
+								from agent.tools.web_search_tool import SandboxWebSearchTool
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
+								from dotenv import load_dotenv
-												add conf, with local/dev/prod mode

											
										
										
											2025-04-24 08:45:58 +08:00
+								from utils.config import config
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
-												Refactor to messages table

* wip

* bedrock support

* wip

* frontend messages table imp

* messages imp
											
										
										
											2025-04-06 17:10:18 +08:00
+								from agentpress.thread_manager import ThreadManager
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
+								from agentpress.response_processor import ProcessorConfig
-												website

											
										
										
											2025-04-09 07:37:06 +08:00
+								from agent.tools.sb_shell_tool import SandboxShellTool
-												whoop

											
										
										
											2025-04-09 20:54:25 +08:00
+								from agent.tools.sb_files_tool import SandboxFilesTool
-												bring back browser

											
										
										
											2025-04-14 21:06:02 +08:00
+								from agent.tools.sb_browser_tool import SandboxBrowserTool
-												renaming all

											
										
										
											2025-04-17 01:23:28 +08:00
+								from agent.tools.data_providers_tool import DataProvidersTool
-												fix duplicate agent run

											
										
										
											2025-04-07 00:45:02 +08:00
+								from agent.prompt import get_system_prompt
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								from utils.logger import logger
-												python billing

											
										
										
											2025-04-27 07:47:06 +08:00
+								from utils.auth_utils import get_account_id_from_thread
 								from services.billing import check_billing_status
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								from agent.tools.sb_vision_tool import SandboxVisionTool
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								from services.langfuse import langfuse
 								from langfuse.client import StatefulTraceClient
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								from services.langfuse import langfuse
-												gemini + fix syntax bug in prompts

											
										
										
											2025-05-22 00:19:46 +08:00
+								from agent.gemini_prompt import get_gemini_system_prompt
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								from agent.tools.mcp_tool_wrapper import MCPToolWrapper
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								from agentpress.tool import SchemaType
-												wip

											
										
										
											2025-04-11 00:02:21 +08:00
-												v1 full refactor LLM Response Processor  (#24)


											
										
										
											2025-04-04 23:06:49 +08:00
+								load_dotenv()
-												fe reference wip

											
										
										
											2025-03-30 14:48:57 +08:00
-												thinking & reasoning

											
										
										
											2025-04-18 12:49:41 +08:00
+								async def run_agent(
 								    thread_id: str,
 								    project_id: str,
 								    stream: bool,
 								    thread_manager: Optional[ThreadManager] = None,
 								    native_max_auto_continues: int = 25,
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								    max_iterations: int = 100,
-												fix

											
										
										
											2025-05-05 08:53:24 +08:00
+								    model_name: str = "anthropic/claude-3-7-sonnet-latest",
-												thinking & reasoning

											
										
										
											2025-04-18 12:49:41 +08:00
+								    enable_thinking: Optional[bool] = False,
-												disable context manager

											
										
										
											2025-04-18 14:13:44 +08:00
+								    reasoning_effort: Optional[str] = 'low',
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								    enable_context_manager: bool = True,
-												chore(dev_): merge main into mcp-5

											
										
										
											2025-05-24 16:15:44 +08:00
+								    agent_config: Optional[dict] = None,
-												chore(dev): agent builder mode

											
										
										
											2025-05-31 15:08:39 +08:00
+								    trace: Optional[StatefulTraceClient] = None,
 								    is_agent_builder: Optional[bool] = False
-												thinking & reasoning

											
										
										
											2025-04-18 12:49:41 +08:00
+								):
-												fe reference wip

											
										
										
											2025-03-30 14:48:57 +08:00
+								    """Run the development agent with specified configuration."""
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								    logger.info(f"🚀 Starting agent with model: {model_name}")
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								    if agent_config:
 								        logger.info(f"Using custom agent: {agent_config.get('name', 'Unknown')}")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								    if not trace:
 								        trace = langfuse.trace(name="run_agent", session_id=thread_id, metadata={"project_id": project_id})
 								    thread_manager = ThreadManager(trace=trace)
-												change to exec in deploy

											
										
										
											2025-04-23 21:43:57 +08:00
-												porject_id and migration

											
										
										
											2025-04-10 18:52:56 +08:00
+								    client = await thread_manager.db.client
-												billing checks and UI

											
										
										
											2025-04-14 08:32:08 +08:00
 								    # Get account ID from thread for billing checks
 								    account_id = await get_account_id_from_thread(client, thread_id)
 								    if not account_id:
 								        raise ValueError("Could not determine account ID for thread")
-												project_id to get sandbox id

											
										
										
											2025-04-23 17:34:00 +08:00
+								    # Get sandbox info from project
 								    project = await client.table('projects').select('*').eq('project_id', project_id).execute()
 								    if not project.data or len(project.data) == 0:
 								        raise ValueError(f"Project {project_id} not found")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												project_id to get sandbox id

											
										
										
											2025-04-23 17:34:00 +08:00
+								    project_data = project.data[0]
 								    sandbox_info = project_data.get('sandbox', {})
 								    if not sandbox_info.get('id'):
 								        raise ValueError(f"No sandbox found for project {project_id}")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												project_id to get sandbox id

											
										
										
											2025-04-23 18:00:25 +08:00
+								    # Initialize tools with project_id instead of sandbox object
 								    # This ensures each tool independently verifies it's operating on the correct project
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
 								    # Get enabled tools from agent config, or use defaults
 								    enabled_tools = None
 								    if agent_config and 'agentpress_tools' in agent_config:
 								        enabled_tools = agent_config['agentpress_tools']
 								        logger.info(f"Using custom tool configuration from agent")
 								    # Register tools based on configuration
 								    # If no agent config (enabled_tools is None), register ALL tools for full Suna capabilities
 								    # If agent config exists, only register explicitly enabled tools
-												chore(dev): agent builder mode

											
										
										
											2025-05-31 15:08:39 +08:00
+								    if is_agent_builder:
 								        logger.info("Agent builder mode - registering only update agent tool")
 								        from agent.tools.update_agent_tool import UpdateAgentTool
 								        from services.supabase import DBConnection
 								        db = DBConnection()
 								        target_agent_id = agent_config.get('target_agent_id') if agent_config else None
 								        update_tool = UpdateAgentTool(db, target_agent_id)
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								    if enabled_tools is None:
 								        # No agent specified - register ALL tools for full Suna experience
 								        logger.info("No agent specified - registering all tools for full Suna capabilities")
 								        thread_manager.add_tool(SandboxShellTool, project_id=project_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(SandboxFilesTool, project_id=project_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(SandboxBrowserTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(SandboxDeployTool, project_id=project_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(SandboxExposeTool, project_id=project_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(MessageTool)
 								        thread_manager.add_tool(SandboxWebSearchTool, project_id=project_id, thread_manager=thread_manager)
 								        thread_manager.add_tool(SandboxVisionTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
 								        if config.RAPID_API_KEY:
 								            thread_manager.add_tool(DataProvidersTool)
 								    else:
 								        # Agent specified - only register explicitly enabled tools
 								        logger.info("Custom agent specified - registering only enabled tools")
 								        if enabled_tools.get('sb_shell_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxShellTool, project_id=project_id, thread_manager=thread_manager)
 								        if enabled_tools.get('sb_files_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxFilesTool, project_id=project_id, thread_manager=thread_manager)
 								        if enabled_tools.get('sb_browser_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxBrowserTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
 								        if enabled_tools.get('sb_deploy_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxDeployTool, project_id=project_id, thread_manager=thread_manager)
 								        if enabled_tools.get('sb_expose_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxExposeTool, project_id=project_id, thread_manager=thread_manager)
 								        if enabled_tools.get('message_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(MessageTool)
 								        if enabled_tools.get('web_search_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxWebSearchTool, project_id=project_id, thread_manager=thread_manager)
 								        if enabled_tools.get('sb_vision_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(SandboxVisionTool, project_id=project_id, thread_id=thread_id, thread_manager=thread_manager)
 								        if config.RAPID_API_KEY and enabled_tools.get('data_providers_tool', {}).get('enabled', False):
 								            thread_manager.add_tool(DataProvidersTool)
-												make non-anthropic model work with response example, fe padding bottom increase cuz of new chat input

											
										
										
											2025-05-07 07:56:52 +08:00
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								    # Register MCP tool wrapper if agent has configured MCPs
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								    mcp_wrapper_instance = None
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								    if agent_config and agent_config.get('configured_mcps'):
 								        logger.info(f"Registering MCP tool wrapper for {len(agent_config['configured_mcps'])} MCP servers")
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								        # Register the tool
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								        thread_manager.add_tool(MCPToolWrapper, mcp_configs=agent_config['configured_mcps'])
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
 								        # Get the tool instance from the registry
 								        # The tool is registered with method names as keys
 								        for tool_name, tool_info in thread_manager.tool_registry.tools.items():
 								            if isinstance(tool_info['instance'], MCPToolWrapper):
 								                mcp_wrapper_instance = tool_info['instance']
 								                break
 								        # Initialize the MCP tools asynchronously
 								        if mcp_wrapper_instance:
 								            try:
 								                await mcp_wrapper_instance.initialize_and_register_tools()
 								                logger.info("MCP tools initialized successfully")
 								                # Re-register the updated schemas with the tool registry
 								                # This ensures the dynamically created tools are available for function calling
 								                updated_schemas = mcp_wrapper_instance.get_schemas()
 								                for method_name, schema_list in updated_schemas.items():
 								                    if method_name != 'call_mcp_tool':  # Skip the fallback method
 								                        # Register each dynamic tool in the registry
 								                        for schema in schema_list:
 								                            if schema.schema_type == SchemaType.OPENAPI:
 								                                thread_manager.tool_registry.tools[method_name] = {
 								                                    "instance": mcp_wrapper_instance,
 								                                    "schema": schema
 								                                }
 								                                logger.debug(f"Registered dynamic MCP tool: {method_name}")
 								            except Exception as e:
 								                logger.error(f"Failed to initialize MCP tools: {e}")
 								                # Continue without MCP tools if initialization fails
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								    # Prepare system prompt
 								    # First, get the default system prompt
-												small fix

											
										
										
											2025-05-22 18:35:27 +08:00
+								    if "gemini-2.5-flash" in model_name.lower():
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								        default_system_content = get_gemini_system_prompt()
 								    else:
 								        # Use the original prompt - the LLM can only use tools that are registered
 								        default_system_content = get_system_prompt()
 								    # Add sample response for non-anthropic models
 								    if "anthropic" not in model_name.lower():
-												make non-anthropic model work with response example, fe padding bottom increase cuz of new chat input

											
										
										
											2025-05-07 07:56:52 +08:00
+								        sample_response_path = os.path.join(os.path.dirname(__file__), 'sample_responses/1.txt')
 								        with open(sample_response_path, 'r') as file:
 								            sample_response = file.read()
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								        default_system_content = default_system_content + "\n\n <sample_assistant_response>" + sample_response + "</sample_assistant_response>"
 								    # Handle custom agent system prompt
 								    if agent_config and agent_config.get('system_prompt'):
 								        custom_system_prompt = agent_config['system_prompt'].strip()
-												make non-anthropic model work with response example, fe padding bottom increase cuz of new chat input

											
										
										
											2025-05-07 07:56:52 +08:00
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								        # Completely replace the default system prompt with the custom one
 								        # This prevents confusion and tool hallucination
 								        system_content = custom_system_prompt
 								        logger.info(f"Using ONLY custom agent system prompt for: {agent_config.get('name', 'Unknown')}")
-												make non-anthropic model work with response example, fe padding bottom increase cuz of new chat input

											
										
										
											2025-05-07 07:56:52 +08:00
+								    else:
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								        # Use just the default system prompt
 								        system_content = default_system_content
 								        logger.info("Using default system prompt only")
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
 								    # Add MCP tool information to system prompt if MCP tools are configured
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								    if agent_config and agent_config.get('configured_mcps') and mcp_wrapper_instance and mcp_wrapper_instance._initialized:
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								        mcp_info = "\n\n--- MCP Tools Available ---\n"
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								        mcp_info += "You have access to external MCP (Model Context Protocol) server tools.\n"
 								        mcp_info += "MCP tools can be called directly using their native function names in the standard function calling format:\n"
-												chore(ui): enhance agent playground UI

											
										
										
											2025-05-29 15:19:08 +08:00
+								        mcp_info += '<function_calls>\n'
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								        mcp_info += '<invoke name="{tool_name}">\n'
 								        mcp_info += '<parameter name="param1">value1</parameter>\n'
 								        mcp_info += '<parameter name="param2">value2</parameter>\n'
-												chore(ui): enhance agent playground UI

											
										
										
											2025-05-29 15:19:08 +08:00
+								        mcp_info += '</invoke>\n'
-												chore(dev): auto generate xml schema or mcp

											
										
										
											2025-05-30 15:31:11 +08:00
+								        mcp_info += '</function_calls>\n\n'
 								        # List available MCP tools
 								        mcp_info += "Available MCP tools:\n"
 								        try:
 								            # Get the actual registered schemas from the wrapper
 								            registered_schemas = mcp_wrapper_instance.get_schemas()
 								            for method_name, schema_list in registered_schemas.items():
 								                if method_name == 'call_mcp_tool':
 								                    continue  # Skip the fallback method
 								                # Get the schema info
 								                for schema in schema_list:
 								                    if schema.schema_type == SchemaType.OPENAPI:
 								                        func_info = schema.schema.get('function', {})
 								                        description = func_info.get('description', 'No description available')
 								                        # Extract server name from description if available
 								                        server_match = description.find('(MCP Server: ')
 								                        if server_match != -1:
 								                            server_end = description.find(')', server_match)
 								                            server_info = description[server_match:server_end+1]
 								                        else:
 								                            server_info = ''
 								                        mcp_info += f"- **{method_name}**: {description}\n"
 								                        # Show parameter info
 								                        params = func_info.get('parameters', {})
 								                        props = params.get('properties', {})
 								                        if props:
 								                            mcp_info += f"  Parameters: {', '.join(props.keys())}\n"
 								        except Exception as e:
 								            logger.error(f"Error listing MCP tools: {e}")
 								            mcp_info += "- Error loading MCP tool list\n"
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
-												chore(dev): properly process MCP results

											
										
										
											2025-05-30 03:34:41 +08:00
+								        # Add critical instructions for using search results
 								        mcp_info += "\n🚨 CRITICAL MCP TOOL RESULT INSTRUCTIONS 🚨\n"
 								        mcp_info += "When you use ANY MCP (Model Context Protocol) tools:\n"
 								        mcp_info += "1. ALWAYS read and use the EXACT results returned by the MCP tool\n"
 								        mcp_info += "2. For search tools: ONLY cite URLs, sources, and information from the actual search results\n"
 								        mcp_info += "3. For any tool: Base your response entirely on the tool's output - do NOT add external information\n"
 								        mcp_info += "4. DO NOT fabricate, invent, hallucinate, or make up any sources, URLs, or data\n"
 								        mcp_info += "5. If you need more information, call the MCP tool again with different parameters\n"
 								        mcp_info += "6. When writing reports/summaries: Reference ONLY the data from MCP tool results\n"
 								        mcp_info += "7. If the MCP tool doesn't return enough information, explicitly state this limitation\n"
 								        mcp_info += "8. Always double-check that every fact, URL, and reference comes from the MCP tool output\n"
 								        mcp_info += "\nIMPORTANT: MCP tool results are your PRIMARY and ONLY source of truth for external data!\n"
 								        mcp_info += "NEVER supplement MCP results with your training data or make assumptions beyond what the tools provide.\n"
-												chore(dev): mcp setup

											
										
										
											2025-05-25 02:17:55 +08:00
+								        system_content += mcp_info
-												chore(dev): add custom agent context based on thread id

											
										
										
											2025-05-24 15:08:41 +08:00
+								    system_message = { "role": "system", "content": system_content }
-												fe reference wip

											
										
										
											2025-03-30 14:48:57 +08:00
-												include xml examples, agent wip

											
										
										
											2025-04-10 21:13:32 +08:00
+								    iteration_count = 0
 								    continue_execution = True
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								    latest_user_message = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'user').order('created_at', desc=True).limit(1).execute()
 								    if latest_user_message.data and len(latest_user_message.data) > 0:
 								        data = json.loads(latest_user_message.data[0]['content'])
 								        trace.update(input=data['content'])
-												include xml examples, agent wip

											
										
										
											2025-04-10 21:13:32 +08:00
+								    while continue_execution and iteration_count < max_iterations:
 								        iteration_count += 1
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								        logger.info(f"🔄 Running iteration {iteration_count} of {max_iterations}...")
-												billing checks and UI

											
										
										
											2025-04-14 08:32:08 +08:00
-												wip

											
										
										
											2025-04-18 06:17:48 +08:00
+								        # Billing check on each iteration - still needed within the iterations
-												billing checks and UI

											
										
										
											2025-04-14 08:32:08 +08:00
+								        can_run, message, subscription = await check_billing_status(client, account_id)
 								        if not can_run:
 								            error_msg = f"Billing limit reached: {message}"
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								            trace.event(name="billing_limit_reached", level="ERROR", status_message=(f"{error_msg}"))
-												billing checks and UI

											
										
										
											2025-04-14 08:32:08 +08:00
+								            # Yield a special message to indicate billing limit reached
 								            yield {
 								                "type": "status",
 								                "status": "stopped",
 								                "message": error_msg
 								            }
 								            break
-												agent wip

											
										
										
											2025-04-11 07:53:01 +08:00
+								        # Check if last message is from assistant using direct Supabase query
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
+								        latest_message = await client.table('messages').select('*').eq('thread_id', thread_id).in_('type', ['assistant', 'tool', 'user']).order('created_at', desc=True).limit(1).execute()
-												agent wip

											
										
										
											2025-04-11 07:53:01 +08:00
+								        if latest_message.data and len(latest_message.data) > 0:
 								            message_type = latest_message.data[0].get('type')
 								            if message_type == 'assistant':
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                logger.info(f"Last message was from assistant, stopping execution")
-												fix: Modify log level from INFO to DEFAULT as Langfuse doesnt support INFO

											
										
										
											2025-05-26 12:24:40 +08:00
+								                trace.event(name="last_message_from_assistant", level="DEFAULT", status_message=(f"Last message was from assistant, stopping execution"))
-												agent wip

											
										
										
											2025-04-11 07:53:01 +08:00
+								                continue_execution = False
 								                break
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								        # ---- Temporary Message Handling (Browser State & Image Context) ----
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								        temporary_message = None
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								        temp_message_content_list = [] # List to hold text/image blocks
 								        # Get the latest browser_state message
 								        latest_browser_state_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
 								        if latest_browser_state_msg.data and len(latest_browser_state_msg.data) > 0:
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								            try:
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                browser_content = json.loads(latest_browser_state_msg.data[0]["content"])
 								                screenshot_base64 = browser_content.get("screenshot_base64")
-												chore(dev): upload images to s3

											
										
										
											2025-05-14 13:28:11 +08:00
+								                screenshot_url = browser_content.get("screenshot_url")
 								                # Create a copy of the browser state without screenshot data
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                browser_state_text = browser_content.copy()
 								                browser_state_text.pop('screenshot_base64', None)
 								                browser_state_text.pop('screenshot_url', None)
 								                if browser_state_text:
 								                    temp_message_content_list.append({
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								                        "type": "text",
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                        "text": f"The following is the current state of the browser:\n{json.dumps(browser_state_text, indent=2)}"
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								                    })
-												chore(dev): upload images to s3

											
										
										
											2025-05-14 13:28:11 +08:00
 								                # Prioritize screenshot_url if available
 								                if screenshot_url:
 								                    temp_message_content_list.append({
 								                        "type": "image_url",
 								                        "image_url": {
 								                            "url": screenshot_url,
 								                        }
 								                    })
 								                elif screenshot_base64:
 								                    # Fallback to base64 if URL not available
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                    temp_message_content_list.append({
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								                        "type": "image_url",
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                        "image_url": {
 								                            "url": f"data:image/jpeg;base64,{screenshot_base64}",
 								                        }
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								                    })
 								                else:
-												chore(dev): upload images to s3

											
										
										
											2025-05-14 13:28:11 +08:00
+								                    logger.warning("Browser state found but no screenshot data.")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												fuck yeah

											
										
										
											2025-04-16 00:36:01 +08:00
+								            except Exception as e:
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                logger.error(f"Error parsing browser state: {e}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                trace.event(name="error_parsing_browser_state", level="ERROR", status_message=(f"{e}"))
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
 								        # Get the latest image_context message (NEW)
 								        latest_image_context_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'image_context').order('created_at', desc=True).limit(1).execute()
 								        if latest_image_context_msg.data and len(latest_image_context_msg.data) > 0:
 								            try:
 								                image_context_content = json.loads(latest_image_context_msg.data[0]["content"])
 								                base64_image = image_context_content.get("base64")
 								                mime_type = image_context_content.get("mime_type")
 								                file_path = image_context_content.get("file_path", "unknown file")
 								                if base64_image and mime_type:
 								                    temp_message_content_list.append({
 								                        "type": "text",
 								                        "text": f"Here is the image you requested to see: '{file_path}'"
 								                    })
 								                    temp_message_content_list.append({
 								                        "type": "image_url",
 								                        "image_url": {
 								                            "url": f"data:{mime_type};base64,{base64_image}",
 								                        }
 								                    })
 								                else:
 								                    logger.warning(f"Image context found for '{file_path}' but missing base64 or mime_type.")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
+								                await client.table('messages').delete().eq('message_id', latest_image_context_msg.data[0]["message_id"]).execute()
 								            except Exception as e:
 								                logger.error(f"Error parsing image context: {e}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                trace.event(name="error_parsing_image_context", level="ERROR", status_message=(f"{e}"))
-												support images

											
										
										
											2025-04-28 08:28:21 +08:00
 								        # If we have any content, construct the temporary_message
 								        if temp_message_content_list:
 								            temporary_message = {"role": "user", "content": temp_message_content_list}
 								            # logger.debug(f"Constructed temporary message with {len(temp_message_content_list)} content blocks.")
 								        # ---- End Temporary Message Handling ----
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
+								        # Set max_tokens based on model
 								        max_tokens = None
 								        if "sonnet" in model_name.lower():
 								            max_tokens = 64000
 								        elif "gpt-4" in model_name.lower():
 								            max_tokens = 4096
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								        generation = trace.generation(name="thread_manager.run_thread")
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								        try:
 								            # Make the LLM call and process the response
 								            response = await thread_manager.run_thread(
 								                thread_id=thread_id,
 								                system_prompt=system_message,
 								                stream=stream,
 								                llm_model=model_name,
 								                llm_temperature=0,
 								                llm_max_tokens=max_tokens,
 								                tool_choice="auto",
 								                max_xml_tool_calls=1,
 								                temporary_message=temporary_message,
 								                processor_config=ProcessorConfig(
 								                    xml_tool_calling=True,
 								                    native_tool_calling=False,
 								                    execute_tools=True,
 								                    execute_on_stream=True,
 								                    tool_execution_strategy="parallel",
 								                    xml_adding_strategy="user_message"
 								                ),
 								                native_max_auto_continues=native_max_auto_continues,
 								                include_xml_examples=True,
 								                enable_thinking=enable_thinking,
 								                reasoning_effort=reasoning_effort,
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								                enable_context_manager=enable_context_manager,
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                generation=generation
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								            )
 								            if isinstance(response, dict) and "status" in response and response["status"] == "error":
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                logger.error(f"Error response from run_thread: {response.get('message', 'Unknown error')}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                trace.event(name="error_response_from_run_thread", level="ERROR", status_message=(f"{response.get('message', 'Unknown error')}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                yield response
 								                break
 								            # Track if we see ask, complete, or web-browser-takeover tool calls
 								            last_tool_call = None
-												chore(dev): refactor xml tools

											
										
										
											2025-05-28 20:07:54 +08:00
+								            agent_should_terminate = False
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								            # Process the response
 								            error_detected = False
 								            try:
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								                full_response = ""
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                async for chunk in response:
 								                    # If we receive an error chunk, we should stop after this iteration
 								                    if isinstance(chunk, dict) and chunk.get('type') == 'status' and chunk.get('status') == 'error':
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                        logger.error(f"Error chunk detected: {chunk.get('message', 'Unknown error')}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                        trace.event(name="error_chunk_detected", level="ERROR", status_message=(f"{chunk.get('message', 'Unknown error')}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                        error_detected = True
 								                        yield chunk  # Forward the error chunk
 								                        continue     # Continue processing other chunks but don't break yet
-												chore(dev): refactor xml tools

											
										
										
											2025-05-28 20:07:54 +08:00
 								                    # Check for termination signal in status messages
 								                    if chunk.get('type') == 'status':
 								                        try:
 								                            # Parse the metadata to check for termination signal
 								                            metadata = chunk.get('metadata', {})
 								                            if isinstance(metadata, str):
 								                                metadata = json.loads(metadata)
 								                            if metadata.get('agent_should_terminate'):
 								                                agent_should_terminate = True
 								                                logger.info("Agent termination signal detected in status message")
 								                                trace.event(name="agent_termination_signal_detected", level="DEFAULT", status_message="Agent termination signal detected in status message")
 								                                # Extract the tool name from the status content if available
 								                                content = chunk.get('content', {})
 								                                if isinstance(content, str):
 								                                    content = json.loads(content)
 								                                if content.get('function_name'):
 								                                    last_tool_call = content['function_name']
 								                                elif content.get('xml_tag_name'):
 								                                    last_tool_call = content['xml_tag_name']
 								                        except Exception as e:
 								                            logger.debug(f"Error parsing status message for termination check: {e}")
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
 								                    # Check for XML versions like <ask>, <complete>, or <web-browser-takeover> in assistant content chunks
 								                    if chunk.get('type') == 'assistant' and 'content' in chunk:
 								                        try:
 								                            # The content field might be a JSON string or object
 								                            content = chunk.get('content', '{}')
 								                            if isinstance(content, str):
 								                                assistant_content_json = json.loads(content)
 								                            else:
 								                                assistant_content_json = content
 								                            # The actual text content is nested within
 								                            assistant_text = assistant_content_json.get('content', '')
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								                            full_response += assistant_text
-												chore(dev): refactor xml tools

											
										
										
											2025-05-28 20:07:54 +08:00
+								                            if isinstance(assistant_text, str):
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                                if '</ask>' in assistant_text or '</complete>' in assistant_text or '</web-browser-takeover>' in assistant_text:
 								                                   if '</ask>' in assistant_text:
 								                                       xml_tool = 'ask'
 								                                   elif '</complete>' in assistant_text:
 								                                       xml_tool = 'complete'
 								                                   elif '</web-browser-takeover>' in assistant_text:
 								                                       xml_tool = 'web-browser-takeover'
 								                                   last_tool_call = xml_tool
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                                   logger.info(f"Agent used XML tool: {xml_tool}")
-												fix: Modify log level from INFO to DEFAULT as Langfuse doesnt support INFO

											
										
										
											2025-05-26 12:24:40 +08:00
+								                                   trace.event(name="agent_used_xml_tool", level="DEFAULT", status_message=(f"Agent used XML tool: {xml_tool}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                        except json.JSONDecodeError:
 								                            # Handle cases where content might not be valid JSON
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                            logger.warning(f"Warning: Could not parse assistant content JSON: {chunk.get('content')}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                            trace.event(name="warning_could_not_parse_assistant_content_json", level="WARNING", status_message=(f"Warning: Could not parse assistant content JSON: {chunk.get('content')}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                        except Exception as e:
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                            logger.error(f"Error processing assistant chunk: {e}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                            trace.event(name="error_processing_assistant_chunk", level="ERROR", status_message=(f"Error processing assistant chunk: {e}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
 								                    yield chunk
 								                # Check if we should stop based on the last tool call or error
 								                if error_detected:
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                    logger.info(f"Stopping due to error detected in response")
-												fix: Modify log level from INFO to DEFAULT as Langfuse doesnt support INFO

											
										
										
											2025-05-26 12:24:40 +08:00
+								                    trace.event(name="stopping_due_to_error_detected_in_response", level="DEFAULT", status_message=(f"Stopping due to error detected in response"))
-												fix(backend): enhance trace logging levels for error handling and execution status

											
										
										
											2025-05-22 16:36:58 +08:00
+								                    generation.end(output=full_response, status_message="error_detected", level="ERROR")
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                    break
-												chore(dev): refactor xml tools

											
										
										
											2025-05-28 20:07:54 +08:00
+								                if agent_should_terminate or last_tool_call in ['ask', 'complete', 'web-browser-takeover']:
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                    logger.info(f"Agent decided to stop with tool: {last_tool_call}")
-												fix: Modify log level from INFO to DEFAULT as Langfuse doesnt support INFO

											
										
										
											2025-05-26 12:24:40 +08:00
+								                    trace.event(name="agent_decided_to_stop_with_tool", level="DEFAULT", status_message=(f"Agent decided to stop with tool: {last_tool_call}"))
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								                    generation.end(output=full_response, status_message="agent_stopped")
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                    continue_execution = False
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								            except Exception as e:
 								                # Just log the error and re-raise to stop all iterations
 								                error_msg = f"Error during response streaming: {str(e)}"
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								                logger.error(f"Error: {error_msg}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								                trace.event(name="error_during_response_streaming", level="ERROR", status_message=(f"Error during response streaming: {str(e)}"))
-												fix(backend): enhance trace logging levels for error handling and execution status

											
										
										
											2025-05-22 16:36:58 +08:00
+								                generation.end(output=full_response, status_message=error_msg, level="ERROR")
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								                yield {
 								                    "type": "status",
 								                    "status": "error",
 								                    "message": error_msg
 								                }
 								                # Stop execution immediately on any error
 								                break
 								        except Exception as e:
 								            # Just log the error and re-raise to stop all iterations
 								            error_msg = f"Error running thread: {str(e)}"
-												scrape & read based approach v1, non-optimal but better for ctxt

											
										
										
											2025-05-10 12:37:07 +08:00
+								            logger.error(f"Error: {error_msg}")
-												refactor(api): enhance trace handling in response processing

											
										
										
											2025-05-24 20:50:36 +08:00
+								            trace.event(name="error_running_thread", level="ERROR", status_message=(f"Error running thread: {str(e)}"))
-												run, thread_run, response processor, raise errs properly

											
										
										
											2025-05-10 09:58:57 +08:00
+								            yield {
 								                "type": "status",
 								                "status": "error",
 								                "message": error_msg
 								            }
 								            # Stop execution immediately on any error
 								            break
-												feat(backend): langfuse traces

											
										
										
											2025-05-21 08:39:28 +08:00
+								        generation.end(output=full_response)
 								    langfuse.flush() # Flush Langfuse events at the end of the run
-												include xml examples, agent wip

											
										
										
											2025-04-10 21:13:32 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								# # TESTING
-												v1 full refactor LLM Response Processor  (#24)


											
										
										
											2025-04-04 23:06:49 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								# async def test_agent():
 								#     """Test function to run the agent with a sample query"""
 								#     from agentpress.thread_manager import ThreadManager
 								#     from services.supabase import DBConnection
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Initialize ThreadManager
 								#     thread_manager = ThreadManager()
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Create a test thread directly with Postgres function
 								#     client = await DBConnection().client
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     try:
 								#         # Get user's personal account
 								#         account_result = await client.rpc('get_personal_account').execute()
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         # if not account_result.data:
 								#         #     print("Error: No personal account found")
 								#         #     return
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         account_id = "a5fe9cb6-4812-407e-a61c-fe95b7320c59"
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         if not account_id:
 								#             print("Error: Could not get account ID")
 								#             return
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         # Find or create a test project in the user's account
 								#         project_result = await client.table('projects').select('*').eq('name', 'test11').eq('account_id', account_id).execute()
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         if project_result.data and len(project_result.data) > 0:
 								#             # Use existing test project
 								#             project_id = project_result.data[0]['project_id']
 								#             print(f"\n🔄 Using existing test project: {project_id}")
 								#         else:
 								#             # Create new test project if none exists
 								#             project_result = await client.table('projects').insert({
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
+								#                 "name": "test11",
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 "account_id": account_id
 								#             }).execute()
 								#             project_id = project_result.data[0]['project_id']
 								#             print(f"\n✨ Created new test project: {project_id}")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         # Create a thread for this project
 								#         thread_result = await client.table('threads').insert({
 								#             'project_id': project_id,
 								#             'account_id': account_id
 								#         }).execute()
 								#         thread_data = thread_result.data[0] if thread_result.data else None
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         if not thread_data:
 								#             print("Error: No thread data returned")
 								#             return
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         thread_id = thread_data['thread_id']
 								#     except Exception as e:
 								#         print(f"Error setting up thread: {str(e)}")
 								#         return
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     print(f"\n🤖 Agent Thread Created: {thread_id}\n")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Interactive message input loop
 								#     while True:
 								#         # Get user input
 								#         user_message = input("\n💬 Enter your message (or 'exit' to quit): ")
 								#         if user_message.lower() == 'exit':
 								#             break
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         if not user_message.strip():
 								#             print("\n🔄 Running agent...\n")
 								#             await process_agent_response(thread_id, project_id, thread_manager)
 								#             continue
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         # Add the user message to the thread
 								#         await thread_manager.add_message(
 								#             thread_id=thread_id,
 								#             type="user",
 								#             content={
 								#                 "role": "user",
 								#                 "content": user_message
 								#             },
 								#             is_llm_message=True
 								#         )
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         print("\n🔄 Running agent...\n")
 								#         await process_agent_response(thread_id, project_id, thread_manager)
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     print("\n👋 Test completed. Goodbye!")
-												wip

											
										
										
											2025-04-09 07:24:15 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								# async def process_agent_response(
 								#     thread_id: str,
 								#     project_id: str,
 								#     thread_manager: ThreadManager,
 								#     stream: bool = True,
 								#     model_name: str = "anthropic/claude-3-7-sonnet-latest",
 								#     enable_thinking: Optional[bool] = False,
 								#     reasoning_effort: Optional[str] = 'low',
 								#     enable_context_manager: bool = True
 								# ):
 								#     """Process the streaming response from the agent."""
 								#     chunk_counter = 0
 								#     current_response = ""
 								#     tool_usage_counter = 0 # Renamed from tool_call_counter as we track usage via status
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Create a test sandbox for processing with a unique test prefix to avoid conflicts with production sandboxes
 								#     sandbox_pass = str(uuid4())
 								#     sandbox = create_sandbox(sandbox_pass)
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Store the original ID so we can refer to it
 								#     original_sandbox_id = sandbox.id
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Generate a clear test identifier
 								#     test_prefix = f"test_{uuid4().hex[:8]}_"
 								#     logger.info(f"Created test sandbox with ID {original_sandbox_id} and test prefix {test_prefix}")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Log the sandbox URL for debugging
 								#     print(f"\033[91mTest sandbox created: {str(sandbox.get_preview_link(6080))}/vnc_lite.html?password={sandbox_pass}\033[0m")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     async for chunk in run_agent(
 								#         thread_id=thread_id,
 								#         project_id=project_id,
 								#         sandbox=sandbox,
 								#         stream=stream,
 								#         thread_manager=thread_manager,
 								#         native_max_auto_continues=25,
 								#         model_name=model_name,
 								#         enable_thinking=enable_thinking,
 								#         reasoning_effort=reasoning_effort,
 								#         enable_context_manager=enable_context_manager
 								#     ):
 								#         chunk_counter += 1
 								#         # print(f"CHUNK: {chunk}") # Uncomment for debugging
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         if chunk.get('type') == 'assistant':
 								#             # Try parsing the content JSON
 								#             try:
 								#                 # Handle content as string or object
 								#                 content = chunk.get('content', '{}')
 								#                 if isinstance(content, str):
 								#                     content_json = json.loads(content)
 								#                 else:
 								#                     content_json = content
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 actual_content = content_json.get('content', '')
 								#                 # Print the actual assistant text content as it comes
 								#                 if actual_content:
 								#                      # Check if it contains XML tool tags, if so, print the whole tag for context
 								#                     if '<' in actual_content and '>' in actual_content:
 								#                          # Avoid printing potentially huge raw content if it's not just text
 								#                          if len(actual_content) < 500: # Heuristic limit
 								#                             print(actual_content, end='', flush=True)
 								#                          else:
 								#                              # Maybe just print a summary if it's too long or contains complex XML
 								#                              if '</ask>' in actual_content: print("<ask>...</ask>", end='', flush=True)
 								#                              elif '</complete>' in actual_content: print("<complete>...</complete>", end='', flush=True)
 								#                              else: print("<tool_call>...</tool_call>", end='', flush=True) # Generic case
 								#                     else:
 								#                         # Regular text content
 								#                          print(actual_content, end='', flush=True)
 								#                     current_response += actual_content # Accumulate only text part
 								#             except json.JSONDecodeError:
 								#                  # If content is not JSON (e.g., just a string chunk), print directly
 								#                  raw_content = chunk.get('content', '')
 								#                  print(raw_content, end='', flush=True)
 								#                  current_response += raw_content
 								#             except Exception as e:
 								#                  print(f"\nError processing assistant chunk: {e}\n")
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         elif chunk.get('type') == 'tool': # Updated from 'tool_result'
 								#             # Add timestamp and format tool result nicely
 								#             tool_name = "UnknownTool" # Try to get from metadata if available
 								#             result_content = "No content"
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             # Parse metadata - handle both string and dict formats
 								#             metadata = chunk.get('metadata', {})
 								#             if isinstance(metadata, str):
 								#                 try:
 								#                     metadata = json.loads(metadata)
 								#                 except json.JSONDecodeError:
 								#                     metadata = {}
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             linked_assistant_msg_id = metadata.get('assistant_message_id')
 								#             parsing_details = metadata.get('parsing_details')
 								#             if parsing_details:
 								#                 tool_name = parsing_details.get('xml_tag_name', 'UnknownTool') # Get name from parsing details
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             try:
 								#                 # Content is a JSON string or object
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
+								#                 content = chunk.get('content', '{}')
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 if isinstance(content, str):
 								#                     content_json = json.loads(content)
 								#                 else:
 								#                     content_json = content
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 # The actual tool result is nested inside content.content
 								#                 tool_result_str = content_json.get('content', '')
 								#                  # Extract the actual tool result string (remove outer <tool_result> tag if present)
 								#                 match = re.search(rf'<{tool_name}>(.*?)</{tool_name}>', tool_result_str, re.DOTALL)
 								#                 if match:
 								#                     result_content = match.group(1).strip()
 								#                     # Try to parse the result string itself as JSON for pretty printing
 								#                     try:
 								#                         result_obj = json.loads(result_content)
 								#                         result_content = json.dumps(result_obj, indent=2)
 								#                     except json.JSONDecodeError:
 								#                          # Keep as string if not JSON
 								#                          pass
 								#                 else:
 								#                      # Fallback if tag extraction fails
 								#                      result_content = tool_result_str
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             except json.JSONDecodeError:
 								#                 result_content = chunk.get('content', 'Error parsing tool content')
 								#             except Exception as e:
 								#                 result_content = f"Error processing tool chunk: {e}"
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             print(f"\n\n🛠️  TOOL RESULT [{tool_name}] → {result_content}")
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         elif chunk.get('type') == 'status':
 								#             # Log tool status changes
 								#             try:
 								#                 # Handle content as string or object
 								#                 status_content = chunk.get('content', '{}')
 								#                 if isinstance(status_content, str):
 								#                     status_content = json.loads(status_content)
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 status_type = status_content.get('status_type')
 								#                 function_name = status_content.get('function_name', '')
 								#                 xml_tag_name = status_content.get('xml_tag_name', '') # Get XML tag if available
 								#                 tool_name = xml_tag_name or function_name # Prefer XML tag name
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#                 if status_type == 'tool_started' and tool_name:
 								#                     tool_usage_counter += 1
 								#                     print(f"\n⏳ TOOL STARTING #{tool_usage_counter} [{tool_name}]")
 								#                     print("  " + "-" * 40)
 								#                     # Return to the current content display
 								#                     if current_response:
 								#                         print("\nContinuing response:", flush=True)
 								#                         print(current_response, end='', flush=True)
 								#                 elif status_type == 'tool_completed' and tool_name:
 								#                      status_emoji = "✅"
 								#                      print(f"\n{status_emoji} TOOL COMPLETED: {tool_name}")
 								#                 elif status_type == 'finish':
 								#                      finish_reason = status_content.get('finish_reason', '')
 								#                      if finish_reason:
 								#                          print(f"\n📌 Finished: {finish_reason}")
 								#                 # else: # Print other status types if needed for debugging
 								#                 #    print(f"\nℹ️ STATUS: {chunk.get('content')}")
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#             except json.JSONDecodeError:
 								#                  print(f"\nWarning: Could not parse status content JSON: {chunk.get('content')}")
 								#             except Exception as e:
 								#                 print(f"\nError processing status chunk: {e}")
-												wip

											
										
										
											2025-04-18 18:50:39 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#         # Removed elif chunk.get('type') == 'tool_call': block
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Update final message
 								#     print(f"\n\n✅ Agent run completed with {tool_usage_counter} tool executions")
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Try to clean up the test sandbox if possible
 								#     try:
 								#         # Attempt to delete/archive the sandbox to clean up resources
 								#         # Note: Actual deletion may depend on the Daytona SDK's capabilities
 								#         logger.info(f"Attempting to clean up test sandbox {original_sandbox_id}")
 								#         # If there's a method to archive/delete the sandbox, call it here
 								#         # Example: daytona.archive_sandbox(sandbox.id)
 								#     except Exception as e:
 								#         logger.warning(f"Failed to clean up test sandbox {original_sandbox_id}: {str(e)}")
-												v1 full refactor LLM Response Processor  (#24)


											
										
										
											2025-04-04 23:06:49 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								# if __name__ == "__main__":
 								#     import asyncio
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Configure any environment variables or setup needed for testing
 								#     load_dotenv()  # Ensure environment variables are loaded
-												Refactor agent and tool configurations to support new model names and parameters

- Adjusted max_tokens logic based on model type, including new conditions for "gpt-4".
- Enhanced MessageTool to include detailed documentation and examples for user interactions.
- Improved ThreadManager to handle new tool choices and reasoning efforts.
- Updated LLM service to incorporate enable_thinking and reasoning_effort parameters.
- Modified chat input component to dynamically manage model selection and thinking options.

											
										
										
											2025-04-26 19:53:09 +08:00
-												wip

											
										
										
											2025-04-23 16:13:07 +08:00
+								#     # Run the test function
 								#     asyncio.run(test_agent())