fix error where it wont add messages to thread if max_xml_tool_calls x > 0

2025-04-10 18:13:12 +01:00 · 2025-04-10 18:13:12 +01:00 · 700ccddbad
parent 329d635dfa
commit 700ccddbad
6 changed files with 58 additions and 51 deletions
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@ -53,8 +53,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
   - Begin with 5-10 major sections covering the entire task lifecycle
   - Include thorough preparation and research sections before implementation
   - Format as markdown checklist with clear, actionable items: `- [ ] Task description`
-   - Include current timestamp and task ID for tracking
-   - Add estimated completion time for each section
   - Build a complete roadmap before starting execution

 2. SECTION-BASED PROGRESSION: Work on one complete section at a time:
@ -73,8 +71,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
   - Carefully evaluate the results before proceeding
   - Mark completed items with `- [x]` using text replacement
   - Add new discovered subtasks as needed
-   - Update task progress estimates
-   - Add timestamps to completed items
   - Document observations and learnings

 5. PROGRESSION GATES: Never advance to a new section until:
@ -88,7 +84,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
   - Preserve completed tasks with their status
   - Add, modify or remove pending tasks
   - Document reason for changes in todo.md
-   - Re-estimate completion times
   - Ensure the modified plan maintains logical progression

 Always reference todo.md by line number when making decisions or reporting progress.
@ -115,10 +110,9 @@ You operate in a methodical, single-step agent loop guided by todo.md:
   - No further actions until execution completes

 4. PROGRESS TRACKING: Update todo.md with detailed progress:
-   - Mark completed items with timestamps
+   - Mark completed items
   - Add new discovered tasks as needed
   - Document lessons learned and observations
-   - Update estimates for remaining work

 5. METHODICAL ITERATION: Repeat steps 1-4 until section completion:
   - Choose only one tool call per iteration
@ -161,7 +155,7 @@ The planner module is responsible for initializing and organizing your todo.md w
 <todo_format>
 Todo.md must follow this comprehensive structured format with many sections:
 ```
-# Task: [Task Name] - Created [Timestamp]
+# Task: [Task Name]

 ## 1. Task Analysis and Planning
 - [ ] 1.1 Understand user requirements completely
@ -169,7 +163,6 @@ Todo.md must follow this comprehensive structured format with many sections:
 - [ ] 1.3 Research similar existing solutions
 - [ ] 1.4 Define success criteria and deliverables
 - [ ] 1.5 Verify understanding of requirements
-Estimated completion time: [Time]

 ## 2. Environment Setup and Preparation
 - [ ] 2.1 Check current environment state
@ -177,7 +170,6 @@ Estimated completion time: [Time]
 - [ ] 2.3 Set up project structure
 - [ ] 2.4 Configure development tools
 - [ ] 2.5 Verify environment readiness
-Estimated completion time: [Time]

 ## 3. Research and Information Gathering
 - [ ] 3.1 Search for relevant documentation
@ -185,7 +177,6 @@ Estimated completion time: [Time]
 - [ ] 3.3 Collect reference materials
 - [ ] 3.4 Organize findings
 - [ ] 3.5 Verify information completeness and accuracy
-Estimated completion time: [Time]

 ## 4. Design and Architecture
 - [ ] 4.1 Create system architecture diagram
@ -193,7 +184,6 @@ Estimated completion time: [Time]
 - [ ] 4.3 Design data structures
 - [ ] 4.4 Plan implementation approach
 - [ ] 4.5 Verify design against requirements
-Estimated completion time: [Time]

 ## 5. Implementation - Component A
 - [ ] 5.1 Implement core functionality
@ -201,7 +191,6 @@ Estimated completion time: [Time]
 - [ ] 5.3 Optimize performance
 - [ ] 5.4 Document code
 - [ ] 5.5 Verify component functionality
-Estimated completion time: [Time]

 ## 6. Implementation - Component B
 - [ ] 6.1 Implement core functionality
@ -209,7 +198,6 @@ Estimated completion time: [Time]
 - [ ] 6.3 Optimize performance
 - [ ] 6.4 Document code
 - [ ] 6.5 Verify component functionality
-Estimated completion time: [Time]

 ## 7. Integration and Testing
 - [ ] 7.1 Integrate all components
@ -217,7 +205,6 @@ Estimated completion time: [Time]
 - [ ] 7.3 Fix identified issues
 - [ ] 7.4 Verify system behavior
 - [ ] 7.5 Document test results
-Estimated completion time: [Time]

 ## 8. Deployment and Delivery
 - [ ] 8.1 Prepare deployment package
@ -225,7 +212,6 @@ Estimated completion time: [Time]
 - [ ] 8.3 Verify deployment success
 - [ ] 8.4 Document deployment process
 - [ ] 8.5 Prepare user documentation
-Estimated completion time: [Time]

 ## 9. Final Verification
 - [ ] 9.1 Validate all deliverables against requirements
@ -233,14 +219,13 @@ Estimated completion time: [Time]
 - [ ] 9.3 Prepare comprehensive summary
 - [ ] 9.4 Compile all documentation
 - [ ] 9.5 Submit completed work to user
-Estimated completion time: [Time]
 ```

-When marking items complete, include timestamps and observations:
-`- [x] 1.1 Understand user requirements completely - Completed [Timestamp] - [Brief observation]`
+When marking items complete, include observations:
+`- [x] 1.1 Understand user requirements completely - [Brief observation]`

 SECTION TRANSITIONS must be documented:
-`## Completed Section: [Section Name] - [Timestamp]
+`## Completed Section: [Section Name]
 Summary: [Comprehensive summary of section achievements and insights]`
 </todo_format>

--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@ -9,7 +9,7 @@ from agentpress.thread_manager import ThreadManager
 from agentpress.response_processor import ProcessorConfig
 from agent.tools.sb_browse_tool import SandboxBrowseTool
 from agent.tools.sb_shell_tool import SandboxShellTool
-from agent.tools.sb_website_tool import SandboxWebsiteTool
+# from agent.tools.sb_website_tool import SandboxWebsiteTool
 from agent.tools.sb_files_tool import SandboxFilesTool
 from agent.prompt import get_system_prompt
 from agent.tools.utils.daytona_sandbox import daytona, create_sandbox, get_or_start_sandbox
@ -28,7 +28,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
    if project.data[0]['sandbox_id']:
        sandbox_id = project.data[0]['sandbox_id']
        sandbox_pass = project.data[0]['sandbox_pass']
-        sandbox = await get_or_start_sandbox(sandbox_id, sandbox_pass)
+        sandbox = await get_or_start_sandbox(sandbox_id)
    else:
        sandbox_pass = str(uuid4())
        sandbox = create_sandbox(sandbox_pass)
@ -37,13 +37,13 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
            'sandbox_id': sandbox_id,
            'sandbox_pass': sandbox_pass
        }).eq('project_id', project_id).execute()
-    ### ---

    
    thread_manager.add_tool(SandboxBrowseTool, sandbox_id=sandbox_id, password=sandbox_pass)
-    thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass)
+    # thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass)
    thread_manager.add_tool(SandboxShellTool, sandbox_id=sandbox_id, password=sandbox_pass)
    thread_manager.add_tool(SandboxFilesTool, sandbox_id=sandbox_id, password=sandbox_pass)
+    files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass)

    system_message = { "role": "system", "content": get_system_prompt() }

@ -56,8 +56,6 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
    # model_name = "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0"
    # model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"

-    files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass)
-
    iteration_count = 0
    continue_execution = True
    
@ -73,13 +71,15 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
        state_message = {
            "role": "user",
            "content": f"""
-Current development environment workspace state:
+Current workspace state:
 <current_workspace_state>
 {state_str}
 </current_workspace_state>
            """
        }

+        # print(f"State message: {state_message}")
+
        response = await thread_manager.run_thread(
            thread_id=thread_id,
            system_prompt=system_message,
@ -91,8 +91,8 @@ Current development environment workspace state:
            tool_choice="auto",
            max_xml_tool_calls=1,
            processor_config=ProcessorConfig(
-                xml_tool_calling=False,
-                native_tool_calling=True,
+                xml_tool_calling=True,
+                native_tool_calling=False,
                execute_tools=True,
                execute_on_stream=True,
                tool_execution_strategy="parallel",
@ -116,6 +116,13 @@ Current development environment workspace state:
                function_name = tool_call.get('function', {}).get('name', '')
                if function_name in ['message_ask_user', 'idle']:
                    last_tool_call = function_name
+            # Check for XML versions like <message_ask_user> or <Idle> in content chunks
+            elif chunk.get('type') == 'content' and 'content' in chunk:
+                content = chunk.get('content', '')
+                if '<message_ask_user>' in content or '<Idle>' in content:
+                    xml_tool = 'message_ask_user' if '<message_ask_user>' in content else 'idle'
+                    last_tool_call = xml_tool
+                    print(f"Agent used XML tool: {xml_tool}")
                    
            yield chunk
        
@ -126,7 +133,6 @@ Current development environment workspace state:



-
 # TESTING

 async def test_agent():
--- a/backend/agent/tools/sb_files_tool.py
+++ b/backend/agent/tools/sb_files_tool.py
@ -94,11 +94,10 @@ class SandboxFilesTool(SandboxToolsBase):
        tag_name="create-file",
        mappings=[
            {"param_name": "file_path", "node_type": "attribute", "path": "."},
-            {"param_name": "file_contents", "node_type": "content", "path": "."},
-            {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"}
+            {"param_name": "file_contents", "node_type": "content", "path": "."}
        ],
        example='''
-        <create-file file_path="path/to/file" permissions="644">
+        <create-file file_path="path/to/file">
        File contents go here
        </create-file>
        '''
@ -225,11 +224,10 @@ class SandboxFilesTool(SandboxToolsBase):
        tag_name="full-file-rewrite",
        mappings=[
            {"param_name": "file_path", "node_type": "attribute", "path": "."},
-            {"param_name": "file_contents", "node_type": "content", "path": "."},
-            {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"}
+            {"param_name": "file_contents", "node_type": "content", "path": "."}
        ],
        example='''
-        <full-file-rewrite file_path="path/to/file" permissions="644">
+        <full-file-rewrite file_path="path/to/file">
        New file contents go here, replacing all existing content
        </full-file-rewrite>
        '''
@ -316,9 +314,9 @@ class SandboxFilesTool(SandboxToolsBase):
    @xml_schema(
        tag_name="search-files",
        mappings=[
-            {"param_name": "path", "node_type": "attribute", "path": "@path"},
-            {"param_name": "pattern", "node_type": "attribute", "path": "@pattern"},
-            {"param_name": "recursive", "node_type": "attribute", "path": "@recursive"}
+            {"param_name": "path", "node_type": "attribute", "path": "."},
+            {"param_name": "pattern", "node_type": "attribute", "path": "."},
+            {"param_name": "recursive", "node_type": "attribute", "path": "."}
        ],
        example='''
        <search-files path="path/to/search" pattern="text-of-interest" recursive="true">
@ -378,9 +376,9 @@ class SandboxFilesTool(SandboxToolsBase):
    @xml_schema(
        tag_name="replace-in-file",
        mappings=[
-            {"param_name": "file", "node_type": "attribute", "path": "@file"},
-            {"param_name": "pattern", "node_type": "element", "path": "pattern"},
-            {"param_name": "new_value", "node_type": "element", "path": "new_value"}
+            {"param_name": "file", "node_type": "attribute", "path": "."},
+            {"param_name": "pattern", "node_type": "element", "path": "."},
+            {"param_name": "new_value", "node_type": "element", "path": "."}
        ],
        example='''
        <replace-in-file file="path/to/file.txt">
--- a/backend/agent/tools/utils/daytona_sandbox.py
+++ b/backend/agent/tools/utils/daytona_sandbox.py
@ -270,7 +270,7 @@ def wait_for_api_ready(sandbox):
    
    return api_url

-async def get_or_start_sandbox(sandbox_id: str, sandbox_pass: str):
+async def get_or_start_sandbox(sandbox_id: str):
    """Retrieve a sandbox by ID, check its state, and start it if needed.
    Also ensure the sandbox_browser_api and HTTP server services are running."""
    
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@ -81,7 +81,10 @@ class ResponseProcessor:
        
        Args:
            tool_registry: Registry of available tools
-            add_message_callback: Callback function to add messages to the thread
+            add_message_callback: Callback function to add messages to the thread.
+                This function is used to record assistant messages, tool calls,
+                and tool results in the conversation history, making them
+                available for the LLM in subsequent interactions.
        """
        self.tool_registry = tool_registry
        self.add_message = add_message_callback
@ -426,8 +429,8 @@ class ResponseProcessor:
                logger.info(f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls")
            
            # After streaming completes, process any remaining content and tool calls
-            # Only do this if we didn't stop due to XML tool limit (already handled pending executions above)
-            if accumulated_content and finish_reason != "xml_tool_limit_reached":
+            # IMPORTANT: Always process accumulated content even when XML tool limit is reached
+            if accumulated_content:
                # Extract final complete tool calls for native format
                complete_native_tool_calls = []
                if config.native_tool_calling:
@ -621,7 +624,7 @@ class ResponseProcessor:
                                    }
                                })
            
-            # Add assistant message FIRST
+            # Add assistant message FIRST - always do this regardless of finish_reason
            message_data = {
                "role": "assistant",
                "content": content,
@ -1088,7 +1091,20 @@ class ResponseProcessor:
        result: ToolResult,
        strategy: Union[XmlAddingStrategy, str] = "assistant_message"
    ):
-        """Add a tool result to the thread based on the specified format."""
+        """Add a tool result to the conversation thread based on the specified format.
+        
+        This method formats tool results and adds them to the conversation history,
+        making them visible to the LLM in subsequent interactions. Results can be 
+        added either as native tool messages (OpenAI format) or as XML-wrapped content
+        with a specified role (user or assistant).
+        
+        Args:
+            thread_id: ID of the conversation thread
+            tool_call: The original tool call that produced this result
+            result: The result from the tool execution
+            strategy: How to add XML tool results to the conversation
+                     ("user_message", "assistant_message", or "inline_edit")
+        """
        try:
            # Check if this is a native function call (has id field)
            if "id" in tool_call:
@ -1122,7 +1138,8 @@ class ResponseProcessor:
                
                logger.info(f"Adding native tool result for tool_call_id={tool_call['id']} with role=tool")
                
-                # Add as a tool message
+                # Add as a tool message to the conversation history
+                # This makes the result visible to the LLM in the next turn
                await self.add_message(
                    thread_id=thread_id,
                    type="tool",  # Special type for tool responses
@ -1142,7 +1159,8 @@ class ResponseProcessor:
            # Format the content using the formatting helper
            content = self._format_xml_tool_result(tool_call, result)
            
-            # Add the message with the appropriate role
+            # Add the message with the appropriate role to the conversation history
+            # This allows the LLM to see the tool result in subsequent interactions
            result_message = {
                "role": result_role,
                "content": content
--- a/backend/utils/logger.py
+++ b/backend/utils/logger.py
@ -83,7 +83,7 @@ def setup_logger(name: str = 'agentpress') -> logging.Logger:
    
    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
-    console_handler.setLevel(logging.INFO)
+    console_handler.setLevel(logging.DEBUG)
    
    # Create formatters
    file_formatter = logging.Formatter(