From ff7d499d052fade10ac2da29dc7e0a7f1a473ea6 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Fri, 11 Apr 2025 18:30:58 +0100
Subject: [PATCH] wip

---
 backend/agent/prompt.py             |  18 ++--
 backend/agent/run.py                |   4 +-
 backend/agent/tools/message_tool.py | 151 ++++++++++++++++------------
 3 files changed, 98 insertions(+), 75 deletions(-)

diff --git a/backend/agent/prompt.py b/backend/agent/prompt.py
index b40c9fd5..5260cd63 100644
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@@ -50,7 +50,7 @@ The todo.md file is your primary working document and action plan:
 6. The todo.md serves as your instruction set - if a task is in todo.md, you are responsible for completing it
 7. Update the todo.md as you make progress, adding new tasks as needed and marking completed ones
 8. Never delete tasks from todo.md - instead mark them complete with [x] to maintain a record of your work
-9. Once ALL tasks in todo.md are marked complete [x], you MUST call either the 'idle' state or 'message_ask_user' tool to signal task completion. This is the ONLY way to properly terminate execution.
+9. Once ALL tasks in todo.md are marked complete [x], you MUST call either the 'complete' state or 'ask' tool to signal task completion. This is the ONLY way to properly terminate execution.
 10. SCOPE CONSTRAINT: Focus on completing existing tasks before adding new ones; avoid continuously expanding scope
 11. CAPABILITY AWARENESS: Only add tasks that are achievable with your available tools and capabilities
 12. FINALITY: After marking a section complete, do not reopen it or add new tasks to it unless explicitly directed by the user
@@ -61,12 +61,12 @@ The todo.md file is your primary working document and action plan:
 # EXECUTION PHILOSOPHY
 Your approach is deliberately methodical and persistent:
 
-1. You operate autonomously until task completion, only entering idle state when finished
+1. You operate autonomously until task completion, only entering complete state when finished
 2. You execute one step at a time, following a consistent loop: evaluate state → select tool → execute → track progress
 3. Every action is guided by your todo.md, and you consult it before selecting any tool
 4. You thoroughly verify each completed step before moving forward
 5. You provide progress updates to users without requiring their input except when essential
-6. You MUST use either 'idle' state or 'message_ask_user' tool to stop execution - no other method will halt the execution loop
+6. You MUST use either 'complete' state or 'ask' tool to stop execution - no other method will halt the execution loop
 7. CRITICALLY IMPORTANT: You MUST ALWAYS explicitly use one of these two tools when you've completed your task or need user input
 
 # TECHNICAL PROTOCOLS
@@ -229,7 +229,13 @@ Your approach is deliberately methodical and persistent:
     4. Create verification scripts for complex transformations
     5. Run verification steps and use their results
 
-- COMMUNICATION: Use message tools for updates and essential questions. Include the 'attachments' parameter with file paths or URLs when sharing resources with users.
+- COMMUNICATION TOOLS:
+  * Use 'notify' for updates and progress reports
+  * Use 'ask' for essential questions and clarifications
+  * Include the 'attachments' parameter with file paths or URLs when sharing resources
+  * Use 'complete' only when all tasks are finished and verified
+  * Never use 'complete' unless all todo.md items are marked [x]
+
 - TOOL RESULTS: Carefully analyze all tool execution results to inform your next actions. These results provide critical environmental information including file contents, execution outputs, and search results.
 - FILES: Create organized file structures with clear naming conventions. Store different types of data in appropriate formats.
 - PYTHON EXECUTION: Create reusable modules with proper error handling and logging. Focus on maintainability and readability.
@@ -281,8 +287,8 @@ Your approach is deliberately methodical and persistent:
 7. COMPLETION: Deliver final output with all relevant files as attachments
 
 You persist autonomously throughout this cycle until the task is fully complete. IMPORTANT: You MUST ONLY terminate execution by either:
-1. Entering 'idle' state upon task completion, or
-2. Using the 'message_ask_user' tool when user input is required
+1. Entering 'complete' state upon task completion, or
+2. Using the 'ask' tool when user input is required
 
 No other response pattern will stop the execution loop. The system will continue running you in a loop if you don't explicitly use one of these tools to signal completion or need for user input.
 """
diff --git a/backend/agent/run.py b/backend/agent/run.py
index ae8c2d22..4c1533fe 100644
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@@ -46,14 +46,14 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
 
     system_message = { "role": "system", "content": get_system_prompt() }
 
-    model_name = "anthropic/claude-3-7-sonnet-latest"
+    # model_name = "anthropic/claude-3-7-sonnet-latest"
     # model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"         
     # model_name = "anthropic/claude-3-5-sonnet-latest" 
     # model_name = "anthropic/claude-3-7-sonnet-latest"
     # model_name = "openai/gpt-4o"
     # model_name = "groq/deepseek-r1-distill-llama-70b"
-    # model_name = "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0"
     # model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
+    model_name = "bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0"
 
     iteration_count = 0
     continue_execution = True
diff --git a/backend/agent/tools/message_tool.py b/backend/agent/tools/message_tool.py
index 4d3baeac..e4fb4995 100644
--- a/backend/agent/tools/message_tool.py
+++ b/backend/agent/tools/message_tool.py
@@ -15,21 +15,21 @@ class MessageTool(Tool):
     @openapi_schema({
         "type": "function",
         "function": {
-            "name": "message_notify_user",
-            "description": "Send a message to user without requiring a response. Use for: 1) Progress updates during long-running tasks, 2) Acknowledging receipt of user instructions, 3) Reporting completion of major milestones, 4) Explaining changes in approach or strategy, 5) Summarizing findings or results without requiring input.",
+            "name": "notify",
+            "description": "Send a message to user without requiring a response. Use for: 1) Progress updates during long-running tasks, 2) Acknowledging receipt of user instructions, 3) Reporting completion of major milestones, 4) Explaining changes in approach or strategy, 5) Summarizing findings or results without requiring input. IMPORTANT: Use this tool for one-way communication only - do not use when you need user input or confirmation. Always include relevant references when sharing analysis results, generated files, or external resources. The message should be informative, actionable, and provide clear context about the current state of the task.",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "text": {
                         "type": "string",
-                        "description": "Message text to display to user - should be informative and actionable"
+                        "description": "Message text to display to user - should be informative and actionable. Include: 1) Current status or progress, 2) Key findings or results, 3) Next steps or implications, 4) Any relevant context or background information."
                     },
-                    "attachments": {
+                    "references": {
                         "anyOf": [
                             {"type": "string"},
                             {"items": {"type": "string"}, "type": "array"}
                         ],
-                        "description": "(Optional) List of attachments to show to user, can be file paths or URLs. Include when referencing created files, analysis results, or external resources."
+                        "description": "(Optional) List of files or URLs to reference in the message. Include when: 1) Sharing analysis results or generated files, 2) Referencing external resources or documentation, 3) Providing supporting evidence for findings, 4) Sharing configuration files or scripts. Always use relative paths to /workspace directory."
                     }
                 },
                 "required": ["text"]
@@ -37,39 +37,47 @@ class MessageTool(Tool):
         }
     })
     @xml_schema(
-        tag_name="message-notify-user",
+        tag_name="notify",
         mappings=[
             {"param_name": "text", "node_type": "content", "path": "."},
-            {"param_name": "attachments", "node_type": "attribute", "path": ".", "required": False}
+            {"param_name": "references", "node_type": "attribute", "path": ".", "required": False}
         ],
         example='''
-        <message-notify-user attachments="output/analysis_results.csv,output/visualization.png">
-            I've completed the data analysis and generated visualizations of the key trends. The analysis shows a 15% increase in engagement metrics over the last quarter, with the most significant growth in mobile users.
-        </message-notify-user>
+        <!-- Use notify for one-way communication when you don't need user input -->
+        <!-- Examples of when to use notify: -->
+        <!-- 1. Progress updates during long-running tasks -->
+        <!-- 2. Completion of major milestones -->
+        <!-- 3. Status changes or strategy updates -->
+        <!-- 4. Sharing analysis results or findings -->
+        <!-- 5. Providing context about next steps -->
+        
+        <notify references="output/analysis_results.csv,output/visualization.png">
+            I've completed the data analysis and generated visualizations of the key trends. The analysis shows a 15% increase in engagement metrics over the last quarter, with the most significant growth in mobile users. I've saved the detailed results in the referenced files. Next, I'll proceed with implementing the recommended optimizations for the mobile experience.
+        </notify>
         '''
     )
-    async def message_notify_user(self, text: str, attachments: Optional[Union[str, List[str]]] = None) -> ToolResult:
+    async def notify(self, text: str, references: Optional[Union[str, List[str]]] = None) -> ToolResult:
         """Send a notification message to the user without requiring a response.
         
         Args:
             text: The message to display to the user
-            attachments: Optional file paths or URLs to attach to the message
+            references: Optional file paths or URLs to reference in the message
             
         Returns:
             ToolResult indicating success or failure of the notification
         """
         try:
-            # Convert single attachment to list for consistent handling
-            if attachments and isinstance(attachments, str):
-                attachments = [attachments]
+            # Convert single reference to list for consistent handling
+            if references and isinstance(references, str):
+                references = [references]
                 
             # Format the response message
             response_text = f"NOTIFICATION: {text}"
             
-            # Add attachments information if present
-            if attachments:
-                attachment_list = "\n- ".join(attachments)
-                response_text += f"\n\nAttachments:\n- {attachment_list}"
+            # Add references information if present
+            if references:
+                reference_list = "\n- ".join(references)
+                response_text += f"\n\nReferences:\n- {reference_list}"
             
             return self.success_response(response_text)
         except Exception as e:
@@ -78,26 +86,21 @@ class MessageTool(Tool):
     @openapi_schema({
         "type": "function",
         "function": {
-            "name": "message_ask_user",
-            "description": "Ask user a question and wait for response. Use for: 1) Requesting clarification on ambiguous requirements, 2) Seeking confirmation before proceeding with high-impact changes, 3) Gathering additional information needed to complete a task, 4) Offering options and requesting user preference, 5) Validating assumptions when critical to task success.",
+            "name": "ask",
+            "description": "Ask user a question and wait for response. Use for: 1) Requesting clarification on ambiguous requirements, 2) Seeking confirmation before proceeding with high-impact changes, 3) Gathering additional information needed to complete a task, 4) Offering options and requesting user preference, 5) Validating assumptions when critical to task success. IMPORTANT: Use this tool only when user input is essential to proceed. Always provide clear context and options when applicable. Include relevant references when the question relates to specific files or resources.",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "text": {
                         "type": "string",
-                        "description": "Question text to present to user - should be specific and clearly indicate what information you need"
+                        "description": "Question text to present to user - should be specific and clearly indicate what information you need. Include: 1) Clear question or request, 2) Context about why the input is needed, 3) Available options if applicable, 4) Impact of different choices, 5) Any relevant constraints or considerations."
                     },
-                    "attachments": {
+                    "references": {
                         "anyOf": [
                             {"type": "string"},
                             {"items": {"type": "string"}, "type": "array"}
                         ],
-                        "description": "(Optional) List of question-related files or reference materials. Include when the question references specific content the user needs to see."
-                    },
-                    "suggest_user_takeover": {
-                        "type": "string",
-                        "enum": ["none", "browser"],
-                        "description": "(Optional) Suggested operation for user takeover. Use 'browser' when user might need to access a website for authentication or manual interaction."
+                        "description": "(Optional) List of files or URLs to reference in the question. Include when: 1) Question relates to specific files or configurations, 2) User needs to review content before answering, 3) Options or choices are documented in files, 4) Supporting evidence or context is needed. Always use relative paths to /workspace directory."
                     }
                 },
                 "required": ["text"]
@@ -105,46 +108,53 @@ class MessageTool(Tool):
         }
     })
     @xml_schema(
-        tag_name="message-ask-user",
+        tag_name="ask",
         mappings=[
             {"param_name": "text", "node_type": "content", "path": "."},
-            {"param_name": "attachments", "node_type": "attribute", "path": ".", "required": False},
-            {"param_name": "suggest_user_takeover", "node_type": "attribute", "path": ".", "required": False}
+            {"param_name": "references", "node_type": "attribute", "path": ".", "required": False}
         ],
         example='''
-        <message-ask-user attachments="config/database_options.json,scripts/migration_plan.md" suggest_user_takeover="none">
-            I've prepared two database migration approaches (attached). The first minimizes downtime but requires more storage temporarily, while the second has longer downtime but uses less resources. Which approach would you prefer to implement?
-        </message-ask-user>
+        <!-- Use ask when you need user input to proceed -->
+        <!-- Examples of when to use ask: -->
+        <!-- 1. Clarifying ambiguous requirements -->
+        <!-- 2. Confirming high-impact changes -->
+        <!-- 3. Choosing between implementation options -->
+        <!-- 4. Validating critical assumptions -->
+        <!-- 5. Getting missing information -->
+        
+        <ask references="recipes/chocolate_cake.txt,photos/cake_examples.jpg">
+            I'm planning to bake the chocolate cake for your birthday party. The recipe mentions "rich frosting" but doesn't specify what type. Could you clarify your preferences? For example:
+            1. Would you prefer buttercream or cream cheese frosting?
+            2. Do you want any specific flavor added to the frosting (vanilla, coffee, etc.)?
+            3. Should I add any decorative toppings like sprinkles or fruit?
+            4. Do you have any dietary restrictions I should be aware of?
+            
+            This information will help me make sure the cake meets your expectations for the celebration.
+        </ask>
         '''
     )
-    async def message_ask_user(self, text: str, attachments: Optional[Union[str, List[str]]] = None, 
-                              suggest_user_takeover: str = "none") -> ToolResult:
+    async def ask(self, text: str, references: Optional[Union[str, List[str]]] = None) -> ToolResult:
         """Ask the user a question and wait for a response.
         
         Args:
             text: The question to present to the user
-            attachments: Optional file paths or URLs to attach to the question
-            suggest_user_takeover: Optional suggestion for user takeover (none, browser)
+            references: Optional file paths or URLs to reference in the question
             
         Returns:
             ToolResult indicating the question was successfully sent
         """
         try:
-            # Convert single attachment to list for consistent handling
-            if attachments and isinstance(attachments, str):
-                attachments = [attachments]
+            # Convert single reference to list for consistent handling
+            if references and isinstance(references, str):
+                references = [references]
                 
             # Format the question message
             response_text = f"QUESTION: {text}"
             
-            # Add attachments information if present
-            if attachments:
-                attachment_list = "\n- ".join(attachments)
-                response_text += f"\n\nAttachments:\n- {attachment_list}"
-            
-            # Add user takeover suggestion if not "none"
-            if suggest_user_takeover and suggest_user_takeover != "none":
-                response_text += f"\n\nSuggested takeover: {suggest_user_takeover}"
+            # Add references information if present
+            if references:
+                reference_list = "\n- ".join(references)
+                response_text += f"\n\nReferences:\n- {reference_list}"
             
             return self.success_response(response_text, requires_response=True)
         except Exception as e:
@@ -153,33 +163,41 @@ class MessageTool(Tool):
     @openapi_schema({
         "type": "function",
         "function": {
-            "name": "idle",
-            "description": "A special tool to indicate you have completed all tasks and are about to enter idle state. Use ONLY when: 1) All tasks in todo.md are marked complete, 2) The user's original request has been fully addressed, 3) There are no pending actions or follow-ups required, 4) You've delivered all final outputs and results to the user.",
+            "name": "complete",
+            "description": "A special tool to indicate you have completed all tasks and are about to enter complete state. Use ONLY when: 1) All tasks in todo.md are marked complete [x], 2) The user's original request has been fully addressed, 3) There are no pending actions or follow-ups required, 4) You've delivered all final outputs and results to the user. IMPORTANT: This is the ONLY way to properly terminate execution. Never use this tool unless ALL tasks are complete and verified. Always ensure you've provided all necessary outputs and references before using this tool.",
             "parameters": {
                 "type": "object"
             }
         }
     })
     @xml_schema(
-        tag_name="idle",
+        tag_name="complete",
         mappings=[],
         example='''
-        <idle>
-        <!-- Use this tool only after completing all tasks and delivering all final outputs -->
-        <!-- All todo.md items must be marked complete [x] before using this tool -->
-        </idle>
+        <!-- Use complete ONLY when ALL tasks are finished -->
+        <!-- Prerequisites for using complete: -->
+        <!-- 1. All todo.md items marked complete [x] -->
+        <!-- 2. User's original request fully addressed -->
+        <!-- 3. All outputs and results delivered -->
+        <!-- 4. No pending actions or follow-ups -->
+        <!-- 5. All tasks verified and validated -->
+        
+        <complete>
+        <!-- This tool indicates successful completion of all tasks -->
+        <!-- The system will stop execution after this tool is used -->
+        </complete>
         '''
     )
-    async def idle(self) -> ToolResult:
-        """Indicate that the agent has completed all tasks and is entering idle state.
+    async def complete(self) -> ToolResult:
+        """Indicate that the agent has completed all tasks and is entering complete state.
         
         Returns:
-            ToolResult indicating successful transition to idle state
+            ToolResult indicating successful transition to complete state
         """
         try:
-            return self.success_response("Entering idle state")
+            return self.success_response("Entering complete state")
         except Exception as e:
-            return self.fail_response(f"Error entering idle state: {str(e)}")
+            return self.fail_response(f"Error entering complete state: {str(e)}")
 
 
 if __name__ == "__main__":
@@ -189,17 +207,16 @@ if __name__ == "__main__":
         message_tool = MessageTool()
         
         # Test notification
-        notify_result = await message_tool.message_notify_user(
+        notify_result = await message_tool.notify(
             "Processing has completed successfully!",
-            attachments=["results.txt", "output.log"]
+            references=["results.txt", "output.log"]
         )
         print("Notification result:", notify_result)
         
         # Test question
-        ask_result = await message_tool.message_ask_user(
+        ask_result = await message_tool.ask(
             "Would you like to proceed with the next phase?",
-            attachments="summary.pdf",
-            suggest_user_takeover="browser"
+            references="summary.pdf"
         )
         print("Question result:", ask_result)