diff --git a/backend/agent/prompt.py b/backend/agent/prompt.py
index 17ff972d..635a953e 100644
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@@ -53,8 +53,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
- Begin with 5-10 major sections covering the entire task lifecycle
- Include thorough preparation and research sections before implementation
- Format as markdown checklist with clear, actionable items: `- [ ] Task description`
- - Include current timestamp and task ID for tracking
- - Add estimated completion time for each section
- Build a complete roadmap before starting execution
2. SECTION-BASED PROGRESSION: Work on one complete section at a time:
@@ -73,8 +71,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
- Carefully evaluate the results before proceeding
- Mark completed items with `- [x]` using text replacement
- Add new discovered subtasks as needed
- - Update task progress estimates
- - Add timestamps to completed items
- Document observations and learnings
5. PROGRESSION GATES: Never advance to a new section until:
@@ -88,7 +84,6 @@ TODO.MD is your central planning tool and source of truth for all tasks. It driv
- Preserve completed tasks with their status
- Add, modify or remove pending tasks
- Document reason for changes in todo.md
- - Re-estimate completion times
- Ensure the modified plan maintains logical progression
Always reference todo.md by line number when making decisions or reporting progress.
@@ -115,10 +110,9 @@ You operate in a methodical, single-step agent loop guided by todo.md:
- No further actions until execution completes
4. PROGRESS TRACKING: Update todo.md with detailed progress:
- - Mark completed items with timestamps
+ - Mark completed items
- Add new discovered tasks as needed
- Document lessons learned and observations
- - Update estimates for remaining work
5. METHODICAL ITERATION: Repeat steps 1-4 until section completion:
- Choose only one tool call per iteration
@@ -161,7 +155,7 @@ The planner module is responsible for initializing and organizing your todo.md w
Todo.md must follow this comprehensive structured format with many sections:
```
-# Task: [Task Name] - Created [Timestamp]
+# Task: [Task Name]
## 1. Task Analysis and Planning
- [ ] 1.1 Understand user requirements completely
@@ -169,7 +163,6 @@ Todo.md must follow this comprehensive structured format with many sections:
- [ ] 1.3 Research similar existing solutions
- [ ] 1.4 Define success criteria and deliverables
- [ ] 1.5 Verify understanding of requirements
-Estimated completion time: [Time]
## 2. Environment Setup and Preparation
- [ ] 2.1 Check current environment state
@@ -177,7 +170,6 @@ Estimated completion time: [Time]
- [ ] 2.3 Set up project structure
- [ ] 2.4 Configure development tools
- [ ] 2.5 Verify environment readiness
-Estimated completion time: [Time]
## 3. Research and Information Gathering
- [ ] 3.1 Search for relevant documentation
@@ -185,7 +177,6 @@ Estimated completion time: [Time]
- [ ] 3.3 Collect reference materials
- [ ] 3.4 Organize findings
- [ ] 3.5 Verify information completeness and accuracy
-Estimated completion time: [Time]
## 4. Design and Architecture
- [ ] 4.1 Create system architecture diagram
@@ -193,7 +184,6 @@ Estimated completion time: [Time]
- [ ] 4.3 Design data structures
- [ ] 4.4 Plan implementation approach
- [ ] 4.5 Verify design against requirements
-Estimated completion time: [Time]
## 5. Implementation - Component A
- [ ] 5.1 Implement core functionality
@@ -201,7 +191,6 @@ Estimated completion time: [Time]
- [ ] 5.3 Optimize performance
- [ ] 5.4 Document code
- [ ] 5.5 Verify component functionality
-Estimated completion time: [Time]
## 6. Implementation - Component B
- [ ] 6.1 Implement core functionality
@@ -209,7 +198,6 @@ Estimated completion time: [Time]
- [ ] 6.3 Optimize performance
- [ ] 6.4 Document code
- [ ] 6.5 Verify component functionality
-Estimated completion time: [Time]
## 7. Integration and Testing
- [ ] 7.1 Integrate all components
@@ -217,7 +205,6 @@ Estimated completion time: [Time]
- [ ] 7.3 Fix identified issues
- [ ] 7.4 Verify system behavior
- [ ] 7.5 Document test results
-Estimated completion time: [Time]
## 8. Deployment and Delivery
- [ ] 8.1 Prepare deployment package
@@ -225,7 +212,6 @@ Estimated completion time: [Time]
- [ ] 8.3 Verify deployment success
- [ ] 8.4 Document deployment process
- [ ] 8.5 Prepare user documentation
-Estimated completion time: [Time]
## 9. Final Verification
- [ ] 9.1 Validate all deliverables against requirements
@@ -233,14 +219,13 @@ Estimated completion time: [Time]
- [ ] 9.3 Prepare comprehensive summary
- [ ] 9.4 Compile all documentation
- [ ] 9.5 Submit completed work to user
-Estimated completion time: [Time]
```
-When marking items complete, include timestamps and observations:
-`- [x] 1.1 Understand user requirements completely - Completed [Timestamp] - [Brief observation]`
+When marking items complete, include observations:
+`- [x] 1.1 Understand user requirements completely - [Brief observation]`
SECTION TRANSITIONS must be documented:
-`## Completed Section: [Section Name] - [Timestamp]
+`## Completed Section: [Section Name]
Summary: [Comprehensive summary of section achievements and insights]`
diff --git a/backend/agent/run.py b/backend/agent/run.py
index df7cd68b..4dd2c95d 100644
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@@ -9,7 +9,7 @@ from agentpress.thread_manager import ThreadManager
from agentpress.response_processor import ProcessorConfig
from agent.tools.sb_browse_tool import SandboxBrowseTool
from agent.tools.sb_shell_tool import SandboxShellTool
-from agent.tools.sb_website_tool import SandboxWebsiteTool
+# from agent.tools.sb_website_tool import SandboxWebsiteTool
from agent.tools.sb_files_tool import SandboxFilesTool
from agent.prompt import get_system_prompt
from agent.tools.utils.daytona_sandbox import daytona, create_sandbox, get_or_start_sandbox
@@ -28,7 +28,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
if project.data[0]['sandbox_id']:
sandbox_id = project.data[0]['sandbox_id']
sandbox_pass = project.data[0]['sandbox_pass']
- sandbox = await get_or_start_sandbox(sandbox_id, sandbox_pass)
+ sandbox = await get_or_start_sandbox(sandbox_id)
else:
sandbox_pass = str(uuid4())
sandbox = create_sandbox(sandbox_pass)
@@ -37,13 +37,13 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
'sandbox_id': sandbox_id,
'sandbox_pass': sandbox_pass
}).eq('project_id', project_id).execute()
- ### ---
thread_manager.add_tool(SandboxBrowseTool, sandbox_id=sandbox_id, password=sandbox_pass)
- thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass)
+ # thread_manager.add_tool(SandboxWebsiteTool, sandbox_id=sandbox_id, password=sandbox_pass)
thread_manager.add_tool(SandboxShellTool, sandbox_id=sandbox_id, password=sandbox_pass)
thread_manager.add_tool(SandboxFilesTool, sandbox_id=sandbox_id, password=sandbox_pass)
+ files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass)
system_message = { "role": "system", "content": get_system_prompt() }
@@ -56,8 +56,6 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
# model_name = "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0"
# model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
- files_tool = SandboxFilesTool(sandbox_id=sandbox_id, password=sandbox_pass)
-
iteration_count = 0
continue_execution = True
@@ -73,13 +71,15 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
state_message = {
"role": "user",
"content": f"""
-Current development environment workspace state:
+Current workspace state:
{state_str}
"""
}
+ # print(f"State message: {state_message}")
+
response = await thread_manager.run_thread(
thread_id=thread_id,
system_prompt=system_message,
@@ -91,8 +91,8 @@ Current development environment workspace state:
tool_choice="auto",
max_xml_tool_calls=1,
processor_config=ProcessorConfig(
- xml_tool_calling=False,
- native_tool_calling=True,
+ xml_tool_calling=True,
+ native_tool_calling=False,
execute_tools=True,
execute_on_stream=True,
tool_execution_strategy="parallel",
@@ -116,6 +116,13 @@ Current development environment workspace state:
function_name = tool_call.get('function', {}).get('name', '')
if function_name in ['message_ask_user', 'idle']:
last_tool_call = function_name
+ # Check for XML versions like or in content chunks
+ elif chunk.get('type') == 'content' and 'content' in chunk:
+ content = chunk.get('content', '')
+ if '' in content or '' in content:
+ xml_tool = 'message_ask_user' if '' in content else 'idle'
+ last_tool_call = xml_tool
+ print(f"Agent used XML tool: {xml_tool}")
yield chunk
@@ -126,7 +133,6 @@ Current development environment workspace state:
-
# TESTING
async def test_agent():
diff --git a/backend/agent/tools/sb_files_tool.py b/backend/agent/tools/sb_files_tool.py
index 05ae2ddf..1e80a6f2 100644
--- a/backend/agent/tools/sb_files_tool.py
+++ b/backend/agent/tools/sb_files_tool.py
@@ -94,11 +94,10 @@ class SandboxFilesTool(SandboxToolsBase):
tag_name="create-file",
mappings=[
{"param_name": "file_path", "node_type": "attribute", "path": "."},
- {"param_name": "file_contents", "node_type": "content", "path": "."},
- {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"}
+ {"param_name": "file_contents", "node_type": "content", "path": "."}
],
example='''
-
+
File contents go here
'''
@@ -225,11 +224,10 @@ class SandboxFilesTool(SandboxToolsBase):
tag_name="full-file-rewrite",
mappings=[
{"param_name": "file_path", "node_type": "attribute", "path": "."},
- {"param_name": "file_contents", "node_type": "content", "path": "."},
- {"param_name": "permissions", "node_type": "attribute", "path": "@permissions"}
+ {"param_name": "file_contents", "node_type": "content", "path": "."}
],
example='''
-
+
New file contents go here, replacing all existing content
'''
@@ -316,9 +314,9 @@ class SandboxFilesTool(SandboxToolsBase):
@xml_schema(
tag_name="search-files",
mappings=[
- {"param_name": "path", "node_type": "attribute", "path": "@path"},
- {"param_name": "pattern", "node_type": "attribute", "path": "@pattern"},
- {"param_name": "recursive", "node_type": "attribute", "path": "@recursive"}
+ {"param_name": "path", "node_type": "attribute", "path": "."},
+ {"param_name": "pattern", "node_type": "attribute", "path": "."},
+ {"param_name": "recursive", "node_type": "attribute", "path": "."}
],
example='''
@@ -378,9 +376,9 @@ class SandboxFilesTool(SandboxToolsBase):
@xml_schema(
tag_name="replace-in-file",
mappings=[
- {"param_name": "file", "node_type": "attribute", "path": "@file"},
- {"param_name": "pattern", "node_type": "element", "path": "pattern"},
- {"param_name": "new_value", "node_type": "element", "path": "new_value"}
+ {"param_name": "file", "node_type": "attribute", "path": "."},
+ {"param_name": "pattern", "node_type": "element", "path": "."},
+ {"param_name": "new_value", "node_type": "element", "path": "."}
],
example='''
diff --git a/backend/agent/tools/utils/daytona_sandbox.py b/backend/agent/tools/utils/daytona_sandbox.py
index 14a15319..b464c15d 100644
--- a/backend/agent/tools/utils/daytona_sandbox.py
+++ b/backend/agent/tools/utils/daytona_sandbox.py
@@ -270,7 +270,7 @@ def wait_for_api_ready(sandbox):
return api_url
-async def get_or_start_sandbox(sandbox_id: str, sandbox_pass: str):
+async def get_or_start_sandbox(sandbox_id: str):
"""Retrieve a sandbox by ID, check its state, and start it if needed.
Also ensure the sandbox_browser_api and HTTP server services are running."""
diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py
index a6e66d0b..848fb8c3 100644
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@@ -81,7 +81,10 @@ class ResponseProcessor:
Args:
tool_registry: Registry of available tools
- add_message_callback: Callback function to add messages to the thread
+ add_message_callback: Callback function to add messages to the thread.
+ This function is used to record assistant messages, tool calls,
+ and tool results in the conversation history, making them
+ available for the LLM in subsequent interactions.
"""
self.tool_registry = tool_registry
self.add_message = add_message_callback
@@ -426,8 +429,8 @@ class ResponseProcessor:
logger.info(f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls")
# After streaming completes, process any remaining content and tool calls
- # Only do this if we didn't stop due to XML tool limit (already handled pending executions above)
- if accumulated_content and finish_reason != "xml_tool_limit_reached":
+ # IMPORTANT: Always process accumulated content even when XML tool limit is reached
+ if accumulated_content:
# Extract final complete tool calls for native format
complete_native_tool_calls = []
if config.native_tool_calling:
@@ -621,7 +624,7 @@ class ResponseProcessor:
}
})
- # Add assistant message FIRST
+ # Add assistant message FIRST - always do this regardless of finish_reason
message_data = {
"role": "assistant",
"content": content,
@@ -1088,7 +1091,20 @@ class ResponseProcessor:
result: ToolResult,
strategy: Union[XmlAddingStrategy, str] = "assistant_message"
):
- """Add a tool result to the thread based on the specified format."""
+ """Add a tool result to the conversation thread based on the specified format.
+
+ This method formats tool results and adds them to the conversation history,
+ making them visible to the LLM in subsequent interactions. Results can be
+ added either as native tool messages (OpenAI format) or as XML-wrapped content
+ with a specified role (user or assistant).
+
+ Args:
+ thread_id: ID of the conversation thread
+ tool_call: The original tool call that produced this result
+ result: The result from the tool execution
+ strategy: How to add XML tool results to the conversation
+ ("user_message", "assistant_message", or "inline_edit")
+ """
try:
# Check if this is a native function call (has id field)
if "id" in tool_call:
@@ -1122,7 +1138,8 @@ class ResponseProcessor:
logger.info(f"Adding native tool result for tool_call_id={tool_call['id']} with role=tool")
- # Add as a tool message
+ # Add as a tool message to the conversation history
+ # This makes the result visible to the LLM in the next turn
await self.add_message(
thread_id=thread_id,
type="tool", # Special type for tool responses
@@ -1142,7 +1159,8 @@ class ResponseProcessor:
# Format the content using the formatting helper
content = self._format_xml_tool_result(tool_call, result)
- # Add the message with the appropriate role
+ # Add the message with the appropriate role to the conversation history
+ # This allows the LLM to see the tool result in subsequent interactions
result_message = {
"role": result_role,
"content": content
diff --git a/backend/utils/logger.py b/backend/utils/logger.py
index 80063207..acb4bbe8 100644
--- a/backend/utils/logger.py
+++ b/backend/utils/logger.py
@@ -83,7 +83,7 @@ def setup_logger(name: str = 'agentpress') -> logging.Logger:
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
- console_handler.setLevel(logging.INFO)
+ console_handler.setLevel(logging.DEBUG)
# Create formatters
file_formatter = logging.Formatter(