From 700ccddbadbd6087e8773dab4c7180fe3f2b0346 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Thu, 10 Apr 2025 18:13:12 +0100 Subject: [PATCH] fix error where it wont add messages to thread if max_xml_tool_calls x > 0 --- backend/agent/prompt.py | 25 +++------------ backend/agent/run.py | 26 ++++++++++------ backend/agent/tools/sb_files_tool.py | 22 ++++++-------- backend/agent/tools/utils/daytona_sandbox.py | 2 +- backend/agentpress/response_processor.py | 32 +++++++++++++++----- backend/utils/logger.py | 2 +- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/backend/agent/prompt.py b/backend/agent/prompt.py index 17ff972d..635a953e 100644 --- a/backend/agent/prompt.py +++ b/backend/agent/prompt.py @@ -53,8 +53,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv - Begin with 5-10 major sections covering the entire task lifecycle - Include thorough preparation and research sections before implementation - Format as markdown checklist with clear, actionable items: `- [ ] Task description` - - Include current timestamp and task ID for tracking - - Add estimated completion time for each section - Build a complete roadmap before starting execution 2. SECTION-BASED PROGRESSION: Work on one complete section at a time: @@ -73,8 +71,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv - Carefully evaluate the results before proceeding - Mark completed items with `- [x]` using text replacement - Add new discovered subtasks as needed - - Update task progress estimates - - Add timestamps to completed items - Document observations and learnings 5. PROGRESSION GATES: Never advance to a new section until: @@ -88,7 +84,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv - Preserve completed tasks with their status - Add, modify or remove pending tasks - Document reason for changes in todo.md - - Re-estimate completion times - Ensure the modified plan maintains logical progression Always reference todo.md by line number when making decisions or reporting progress. @@ -115,10 +110,9 @@ You operate in a methodical, single-step agent loop guided by todo.md: - No further actions until execution completes 4. PROGRESS TRACKING: Update todo.md with detailed progress: - - Mark completed items with timestamps + - Mark completed items - Add new discovered tasks as needed - Document lessons learned and observations - - Update estimates for remaining work 5. METHODICAL ITERATION: Repeat steps 1-4 until section completion: - Choose only one tool call per iteration @@ -161,7 +155,7 @@ The planner module is responsible for initializing and organizing your todo.md w Todo.md must follow this comprehensive structured format with many sections: ``` -# Task: [Task Name] - Created [Timestamp] +# Task: [Task Name] ## 1. Task Analysis and Planning - [ ] 1.1 Understand user requirements completely @@ -169,7 +163,6 @@ Todo.md must follow this comprehensive structured format with many sections: - [ ] 1.3 Research similar existing solutions - [ ] 1.4 Define success criteria and deliverables - [ ] 1.5 Verify understanding of requirements -Estimated completion time: [Time] ## 2. Environment Setup and Preparation - [ ] 2.1 Check current environment state @@ -177,7 +170,6 @@ Estimated completion time: [Time] - [ ] 2.3 Set up project structure - [ ] 2.4 Configure development tools - [ ] 2.5 Verify environment readiness -Estimated completion time: [Time] ## 3. Research and Information Gathering - [ ] 3.1 Search for relevant documentation @@ -185,7 +177,6 @@ Estimated completion time: [Time] - [ ] 3.3 Collect reference materials - [ ] 3.4 Organize findings - [ ] 3.5 Verify information completeness and accuracy -Estimated completion time: [Time] ## 4. Design and Architecture - [ ] 4.1 Create system architecture diagram @@ -193,7 +184,6 @@ Estimated completion time: [Time] - [ ] 4.3 Design data structures - [ ] 4.4 Plan implementation approach - [ ] 4.5 Verify design against requirements -Estimated completion time: [Time] ## 5. Implementation - Component A - [ ] 5.1 Implement core functionality @@ -201,7 +191,6 @@ Estimated completion time: [Time] - [ ] 5.3 Optimize performance - [ ] 5.4 Document code - [ ] 5.5 Verify component functionality -Estimated completion time: [Time] ## 6. Implementation - Component B - [ ] 6.1 Implement core functionality @@ -209,7 +198,6 @@ Estimated completion time: [Time] - [ ] 6.3 Optimize performance - [ ] 6.4 Document code - [ ] 6.5 Verify component functionality -Estimated completion time: [Time] ## 7. Integration and Testing - [ ] 7.1 Integrate all components @@ -217,7 +205,6 @@ Estimated completion time: [Time] - [ ] 7.3 Fix identified issues - [ ] 7.4 Verify system behavior - [ ] 7.5 Document test results -Estimated completion time: [Time] ## 8. Deployment and Delivery - [ ] 8.1 Prepare deployment package @@ -225,7 +212,6 @@ Estimated completion time: [Time] - [ ] 8.3 Verify deployment success - [ ] 8.4 Document deployment process - [ ] 8.5 Prepare user documentation -Estimated completion time: [Time] ## 9. Final Verification - [ ] 9.1 Validate all deliverables against requirements @@ -233,14 +219,13 @@ Estimated completion time: [Time] - [ ] 9.3 Prepare comprehensive summary - [ ] 9.4 Compile all documentation - [ ] 9.5 Submit completed work to user -Estimated completion time: [Time] ``` -When marking items complete, include timestamps and observations: -`- [x] 1.1 Understand user requirements completely - Completed [Timestamp] - [Brief observation]` +When marking items complete, include observations: +`- [x] 1.1 Understand user requirements completely - [Brief observation]` SECTION TRANSITIONS must be documented: -`## Completed Section: [Section Name] - [Timestamp] +`## Completed Section: [Section Name] Summary: [Comprehensive summary of section achievements and insights]` diff --git a/backend/agent/run.py b/backend/agent/run.py index df7cd68b..4dd2c95d 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -9,7 +9,7 @@ from agentpress.thread_manager import ThreadManager from agentpress.response_processor import ProcessorConfig from agent.tools.sb_browse_tool import SandboxBrowseTool from agent.tools.sb_shell_tool import SandboxShellTool -from agent.tools.sb_website_tool import SandboxWebsiteTool +# from agent.tools.sb_website_tool import SandboxWebsiteTool from agent.tools.sb_files_tool import SandboxFilesTool from agent.prompt import get_system_prompt from agent.tools.utils.daytona_sandbox import daytona, create_sandbox, get_or_start_sandbox @@ -28,7 +28,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread if project.data[0]['sandbox_id']: sandbox_id = project.data[0]['sandbox_id'] sandbox_pass = project.data[0]['sandbox_pass'] - sandbox = await get_or_start_sandbox(sandbox_id, sandbox_pass) + sandbox = await get_or_start_sandbox(sandbox_id) else: sandbox_pass = str(uuid4()) sandbox = create_sandbox(sandbox_pass) @@ -37,13 +37,13 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread 'sandbox_id': sandbox_id, 'sandbox_pass': sandbox_pass }).eq('project_id', project_id).execute() - ### --- thread_manager.add_tool(SandboxBrowseTool, sandbox_id=sandbox_id, password=sandbox_pass) - thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass) + # thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass) thread_manager.add_tool(SandboxShellTool, sandbox_id=sandbox_id, password=sandbox_pass) thread_manager.add_tool(SandboxFilesTool, sandbox_id=sandbox_id, password=sandbox_pass) + files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass) system_message = { "role": "system", "content": get_system_prompt() } @@ -56,8 +56,6 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread # model_name = "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0" # model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0" - files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass) - iteration_count = 0 continue_execution = True @@ -73,13 +71,15 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread state_message = { "role": "user", "content": f""" -Current development environment workspace state: +Current workspace state: {state_str} """ } + # print(f"State message: {state_message}") + response = await thread_manager.run_thread( thread_id=thread_id, system_prompt=system_message, @@ -91,8 +91,8 @@ Current development environment workspace state: tool_choice="auto", max_xml_tool_calls=1, processor_config=ProcessorConfig( - xml_tool_calling=False, - native_tool_calling=True, + xml_tool_calling=True, + native_tool_calling=False, execute_tools=True, execute_on_stream=True, tool_execution_strategy="parallel", @@ -116,6 +116,13 @@ Current development environment workspace state: function_name = tool_call.get('function', {}).get('name', '') if function_name in ['message_ask_user', 'idle']: last_tool_call = function_name + # Check for XML versions like or in content chunks + elif chunk.get('type') == 'content' and 'content' in chunk: + content = chunk.get('content', '') + if '' in content or '' in content: + xml_tool = 'message_ask_user' if '' in content else 'idle' + last_tool_call = xml_tool + print(f"Agent used XML tool: {xml_tool}") yield chunk @@ -126,7 +133,6 @@ Current development environment workspace state: - # TESTING async def test_agent(): diff --git a/backend/agent/tools/sb_files_tool.py b/backend/agent/tools/sb_files_tool.py index 05ae2ddf..1e80a6f2 100644 --- a/backend/agent/tools/sb_files_tool.py +++ b/backend/agent/tools/sb_files_tool.py @@ -94,11 +94,10 @@ class SandboxFilesTool(SandboxToolsBase): tag_name="create-file", mappings=[ {"param_name": "file_path", "node_type": "attribute", "path": "."}, - {"param_name": "file_contents", "node_type": "content", "path": "."}, - {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"} + {"param_name": "file_contents", "node_type": "content", "path": "."} ], example=''' - + File contents go here ''' @@ -225,11 +224,10 @@ class SandboxFilesTool(SandboxToolsBase): tag_name="full-file-rewrite", mappings=[ {"param_name": "file_path", "node_type": "attribute", "path": "."}, - {"param_name": "file_contents", "node_type": "content", "path": "."}, - {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"} + {"param_name": "file_contents", "node_type": "content", "path": "."} ], example=''' - + New file contents go here, replacing all existing content ''' @@ -316,9 +314,9 @@ class SandboxFilesTool(SandboxToolsBase): @xml_schema( tag_name="search-files", mappings=[ - {"param_name": "path", "node_type": "attribute", "path": "@path"}, - {"param_name": "pattern", "node_type": "attribute", "path": "@pattern"}, - {"param_name": "recursive", "node_type": "attribute", "path": "@recursive"} + {"param_name": "path", "node_type": "attribute", "path": "."}, + {"param_name": "pattern", "node_type": "attribute", "path": "."}, + {"param_name": "recursive", "node_type": "attribute", "path": "."} ], example=''' @@ -378,9 +376,9 @@ class SandboxFilesTool(SandboxToolsBase): @xml_schema( tag_name="replace-in-file", mappings=[ - {"param_name": "file", "node_type": "attribute", "path": "@file"}, - {"param_name": "pattern", "node_type": "element", "path": "pattern"}, - {"param_name": "new_value", "node_type": "element", "path": "new_value"} + {"param_name": "file", "node_type": "attribute", "path": "."}, + {"param_name": "pattern", "node_type": "element", "path": "."}, + {"param_name": "new_value", "node_type": "element", "path": "."} ], example=''' diff --git a/backend/agent/tools/utils/daytona_sandbox.py b/backend/agent/tools/utils/daytona_sandbox.py index 14a15319..b464c15d 100644 --- a/backend/agent/tools/utils/daytona_sandbox.py +++ b/backend/agent/tools/utils/daytona_sandbox.py @@ -270,7 +270,7 @@ def wait_for_api_ready(sandbox): return api_url -async def get_or_start_sandbox(sandbox_id: str, sandbox_pass: str): +async def get_or_start_sandbox(sandbox_id: str): """Retrieve a sandbox by ID, check its state, and start it if needed. Also ensure the sandbox_browser_api and HTTP server services are running.""" diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py index a6e66d0b..848fb8c3 100644 --- a/backend/agentpress/response_processor.py +++ b/backend/agentpress/response_processor.py @@ -81,7 +81,10 @@ class ResponseProcessor: Args: tool_registry: Registry of available tools - add_message_callback: Callback function to add messages to the thread + add_message_callback: Callback function to add messages to the thread. + This function is used to record assistant messages, tool calls, + and tool results in the conversation history, making them + available for the LLM in subsequent interactions. """ self.tool_registry = tool_registry self.add_message = add_message_callback @@ -426,8 +429,8 @@ class ResponseProcessor: logger.info(f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls") # After streaming completes, process any remaining content and tool calls - # Only do this if we didn't stop due to XML tool limit (already handled pending executions above) - if accumulated_content and finish_reason != "xml_tool_limit_reached": + # IMPORTANT: Always process accumulated content even when XML tool limit is reached + if accumulated_content: # Extract final complete tool calls for native format complete_native_tool_calls = [] if config.native_tool_calling: @@ -621,7 +624,7 @@ class ResponseProcessor: } }) - # Add assistant message FIRST + # Add assistant message FIRST - always do this regardless of finish_reason message_data = { "role": "assistant", "content": content, @@ -1088,7 +1091,20 @@ class ResponseProcessor: result: ToolResult, strategy: Union[XmlAddingStrategy, str] = "assistant_message" ): - """Add a tool result to the thread based on the specified format.""" + """Add a tool result to the conversation thread based on the specified format. + + This method formats tool results and adds them to the conversation history, + making them visible to the LLM in subsequent interactions. Results can be + added either as native tool messages (OpenAI format) or as XML-wrapped content + with a specified role (user or assistant). + + Args: + thread_id: ID of the conversation thread + tool_call: The original tool call that produced this result + result: The result from the tool execution + strategy: How to add XML tool results to the conversation + ("user_message", "assistant_message", or "inline_edit") + """ try: # Check if this is a native function call (has id field) if "id" in tool_call: @@ -1122,7 +1138,8 @@ class ResponseProcessor: logger.info(f"Adding native tool result for tool_call_id={tool_call['id']} with role=tool") - # Add as a tool message + # Add as a tool message to the conversation history + # This makes the result visible to the LLM in the next turn await self.add_message( thread_id=thread_id, type="tool", # Special type for tool responses @@ -1142,7 +1159,8 @@ class ResponseProcessor: # Format the content using the formatting helper content = self._format_xml_tool_result(tool_call, result) - # Add the message with the appropriate role + # Add the message with the appropriate role to the conversation history + # This allows the LLM to see the tool result in subsequent interactions result_message = { "role": result_role, "content": content diff --git a/backend/utils/logger.py b/backend/utils/logger.py index 80063207..acb4bbe8 100644 --- a/backend/utils/logger.py +++ b/backend/utils/logger.py @@ -83,7 +83,7 @@ def setup_logger(name: str = 'agentpress') -> logging.Logger: # Console handler console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(logging.INFO) + console_handler.setLevel(logging.DEBUG) # Create formatters file_formatter = logging.Formatter(