From 9a8b4ace872fe73e1286f86923ff30008738b7ac Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 16 Apr 2025 09:20:46 -0600 Subject: [PATCH] review agent --- .../agents/src/agents/buster_multi_agent.rs | 14 ++++- .../src/agents/modes/create_plan_prompt.rs | 1 + .../agents/src/agents/modes/review_prompt.rs | 2 +- .../file_tools/create_dashboards.rs | 13 +++- .../categories/file_tools/create_metrics.rs | 7 +++ .../file_tools/modify_dashboards.rs | 7 +++ .../categories/file_tools/modify_metrics.rs | 7 +++ .../create_plan_investigative.rs | 2 +- .../create_plan_straightforward.rs | 15 +++-- .../categories/planning_tools/review_plan.rs | 56 ++++++++++++----- .../tools/categories/response_tools/done.rs | 61 +++++++++++++++++-- .../handlers/src/chats/post_chat_handler.rs | 4 +- 12 files changed, 155 insertions(+), 34 deletions(-) diff --git a/api/libs/agents/src/agents/buster_multi_agent.rs b/api/libs/agents/src/agents/buster_multi_agent.rs index d8c6cefe0..6b66e9972 100644 --- a/api/libs/agents/src/agents/buster_multi_agent.rs +++ b/api/libs/agents/src/agents/buster_multi_agent.rs @@ -72,7 +72,7 @@ impl BusterMultiAgent { let create_dashboard_files_tool = CreateDashboardFilesTool::new(Arc::clone(&self.agent)); let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(Arc::clone(&self.agent)); let message_user_clarifying_question_tool = MessageUserClarifyingQuestion::new(); - let done_tool = Done::new(); + let done_tool = Done::new(Arc::clone(&self.agent)); let no_search_needed_tool = NoSearchNeededTool::new(Arc::clone(&self.agent)); let review_tool = ReviewPlan::new(Arc::clone(&self.agent)); @@ -99,7 +99,11 @@ impl BusterMultiAgent { }); let review_condition = - Some(|state: &HashMap| -> bool { state.contains_key("review_needed") }); + Some(|state: &HashMap| -> bool { + state.get("review_needed") + .and_then(|value| value.as_bool()) + .unwrap_or(false) + }); let planning_tools_condition = Some(|state: &HashMap| -> bool { let searched_catalog = state @@ -281,7 +285,11 @@ impl BusterMultiAgent { }; let needs_review_condition = - |state: &HashMap| -> bool { state.contains_key("review_needed") }; + |state: &HashMap| -> bool { + state.get("review_needed") + .and_then(|value| value.as_bool()) + .unwrap_or(false) + }; // Add prompt rules (order matters) // The agent will use the prompt associated with the first condition that evaluates to true. diff --git a/api/libs/agents/src/agents/modes/create_plan_prompt.rs b/api/libs/agents/src/agents/modes/create_plan_prompt.rs index 6c7e7d937..1399a3fd8 100644 --- a/api/libs/agents/src/agents/modes/create_plan_prompt.rs +++ b/api/libs/agents/src/agents/modes/create_plan_prompt.rs @@ -133,6 +133,7 @@ To determine whether to use a Straightforward Plan or an Investigative Plan, con - When creating a plan that involves generating assets (visualizations and dashboards), do not include a separate step for delivering these assets, as they are automatically displayed to the user upon creation. - Assume that all datasets required for the plan are available, as their availability has already been confirmed in the previous step. - If the user's request includes aspects that are not supported (e.g., specific visualizations, forecasts, etc.), do not include these in the step-by-step plan. Instead, mention them in the note section of the plan, and specify that they should be addressed in the final response to the user. +- The tools used for creating plans include a `todos` argument. This argument is a list of short summary points. **Crucially, each step in the generated plan must correspond to exactly one item in the `todos` list.** These `todos` serve as a concise overview of the plan's execution steps. Do not include any review steps in the `todos` list, as reviews are handled separately. **Examples** diff --git a/api/libs/agents/src/agents/modes/review_prompt.rs b/api/libs/agents/src/agents/modes/review_prompt.rs index fb681ee92..01f98b8a4 100644 --- a/api/libs/agents/src/agents/modes/review_prompt.rs +++ b/api/libs/agents/src/agents/modes/review_prompt.rs @@ -20,7 +20,7 @@ Tool Calling You have two tools to do your job: review_plan: Marks a task as complete. Needs todo_item (an integer) to specify which task (starts at 1). -done: Sends the final response to the user and ends the workflow. +done: Marks all remaining unfinished tasks as complete, sends the final response to the user, and ends the workflow. Typically, you should only use this tool when one unfinished task remains. Follow these rules: diff --git a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs index 76aeeda49..30d0f82fa 100644 --- a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs +++ b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs @@ -305,8 +305,19 @@ impl ToolExecutor for CreateDashboardFilesTool { let duration = start_time.elapsed().as_millis() as i64; self.agent - .set_state_value(String::from("metrics_available"), Value::Bool(true)) + .set_state_value(String::from("dashboards_available"), Value::Bool(true)) .await; + + self.agent + .set_state_value(String::from("files_available"), Value::Bool(true)) + .await; + + // Set review_needed flag if execution was successful + if failed_files.is_empty() { + self.agent + .set_state_value(String::from("review_needed"), Value::Bool(true)) + .await; + } Ok(CreateDashboardFilesOutput { message, diff --git a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs index 7c5159fed..9e6c6c2e4 100644 --- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs +++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs @@ -242,6 +242,13 @@ impl ToolExecutor for CreateMetricFilesTool { self.agent .set_state_value(String::from("files_available"), Value::Bool(true)) .await; + + // Set review_needed flag if execution was successful + if failed_files.is_empty() { + self.agent + .set_state_value(String::from("review_needed"), Value::Bool(true)) + .await; + } Ok(CreateMetricFilesOutput { message, diff --git a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs index 863e0e4a8..3bc35ede4 100644 --- a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs +++ b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs @@ -359,6 +359,13 @@ impl ToolExecutor for ModifyDashboardFilesTool { .map(|(file_name, error)| FailedFileModification { file_name, error }), ); + // Set review_needed flag if execution was successful + if output.failed_files.is_empty() { + self.agent + .set_state_value(String::from("review_needed"), Value::Bool(true)) + .await; + } + Ok(output) } diff --git a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs index 9f214e210..d17f6bf12 100644 --- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs +++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs @@ -421,6 +421,13 @@ impl ToolExecutor for ModifyMetricFilesTool { .map(|(file_name, error)| FailedFileModification { file_name, error }), ); + // Set review_needed flag if execution was successful + if output.failed_files.is_empty() { + self.agent + .set_state_value(String::from("review_needed"), Value::Bool(true)) + .await; + } + Ok(output) } diff --git a/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs b/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs index b81a0fcb6..bef2cc4aa 100644 --- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs +++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs @@ -81,7 +81,7 @@ impl ToolExecutor for CreatePlanInvestigative { }, "todos": { "type": "array", - "description": "Ordered todo points summarizing the plan. There should be one todo for each step in the plan, in order. For example, if the plan has two steps, plan_todos should have two items, each summarizing a step. Do not include review or response steps—these will be handled by a separate agent.", + "description": "Ordered todo points summarizing the steps of the plan. There should be max one todo for each step in the plan, in order. For example, if the plan has two steps, plan_todos should have two items, each summarizing a step. Do not include review or response steps—these will be handled by a separate agent.", "items": { "type": "string" }, }, }, diff --git a/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs b/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs index 3972a7c24..6f6d0f020 100644 --- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs +++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs @@ -58,9 +58,17 @@ impl ToolExecutor for CreatePlanStraightforward { .set_state_value(String::from("todos"), Value::Array(todos_state_objects)) .await; - let todos_string = params.todos.iter().map(|item| format!("[ ] {}", item)).collect::>().join("\n"); + let todos_string = params + .todos + .iter() + .map(|item| format!("[ ] {}", item)) + .collect::>() + .join("\n"); - Ok(CreatePlanStraightforwardOutput { success: true, todos: todos_string }) + Ok(CreatePlanStraightforwardOutput { + success: true, + todos: todos_string, + }) } async fn get_schema(&self) -> Value { @@ -81,8 +89,7 @@ impl ToolExecutor for CreatePlanStraightforward { }, "todos": { "type": "array", - "description": "Ordered todo points summarizing the plan. There should be one todo for each step in the plan, in order. For example, if the plan has two steps, plan_todos should have two items, each summarizing a step. Do not include review or response steps—these will be handled by a separate agent.", - "items": { "type": "string" }, + "description": "Ordered todo points summarizing the steps of the plan. There should be max one todo for each step in the plan, in order. For example, if the plan has two steps, plan_todos should have two items, each summarizing a step. Do not include review or response steps—these will be handled by a separate agent.", "items": { "type": "string" }, }, }, "additionalProperties": false diff --git a/api/libs/agents/src/tools/categories/planning_tools/review_plan.rs b/api/libs/agents/src/tools/categories/planning_tools/review_plan.rs index 5bc8b78c2..296c07980 100644 --- a/api/libs/agents/src/tools/categories/planning_tools/review_plan.rs +++ b/api/libs/agents/src/tools/categories/planning_tools/review_plan.rs @@ -14,7 +14,7 @@ pub struct ReviewPlanOutput { #[derive(Debug, Deserialize)] pub struct ReviewPlanInput { - pub todo_item: usize, // 0-based index + pub todo_items: Vec, // 1-based index } pub struct ReviewPlan { @@ -45,16 +45,32 @@ impl ToolExecutor for ReviewPlan { } }; - let idx = params.todo_item; - if idx >= todos.len() { - return Err(anyhow::anyhow!("todo_item index {} out of range ({} todos)", idx, todos.len())); - } + let total_todos = todos.len(); - // Mark the todo at the given index as complete - if let Some(Value::Object(map)) = todos.get_mut(idx) { - map.insert("completed".to_string(), Value::Bool(true)); - } else { - return Err(anyhow::anyhow!("Todo item at index {} is not a valid object.", idx)); + for idx_one_based in ¶ms.todo_items { + // Convert 1-based index to 0-based index + if *idx_one_based == 0 { + return Err(anyhow::anyhow!("todo_item index cannot be 0, indexing starts from 1.")); + } + let idx_zero_based = *idx_one_based - 1; + + if idx_zero_based >= total_todos { + return Err(anyhow::anyhow!( + "todo_item index {} out of range ({} todos, 1-based)", + idx_one_based, + total_todos + )); + } + + // Mark the todo at the given index as complete + if let Some(Value::Object(map)) = todos.get_mut(idx_zero_based) { + map.insert("completed".to_string(), Value::Bool(true)); + } else { + return Err(anyhow::anyhow!( + "Todo item at index {} (1-based) is not a valid object.", + idx_one_based + )); + } } // Save the updated todos back to state @@ -74,23 +90,31 @@ impl ToolExecutor for ReviewPlan { .collect::>() .join("\n"); + // Set review_needed to false after review + self.agent + .set_state_value(String::from("review_needed"), Value::Bool(false)) + .await; + Ok(ReviewPlanOutput { success: true, todos: todos_string }) } async fn get_schema(&self) -> Value { serde_json::json!({ "name": self.get_name(), - "description": "Marks a task as complete by its index in the to-do list.", + "description": "Marks one or more tasks as complete by their 1-based indices in the to-do list.", "parameters": { "type": "object", "properties": { - "todo_item": { - "type": "integer", - "description": "The 0-based index of the task to mark as complete (0 is the first item).", - "minimum": 0 + "todo_items": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "description": "A list of 1-based indices of the tasks to mark as complete (1 is the first item)." } }, - "required": ["todo_item"] + "required": ["todo_items"] } }) } diff --git a/api/libs/agents/src/tools/categories/response_tools/done.rs b/api/libs/agents/src/tools/categories/response_tools/done.rs index 064fdabc0..51a59bd4b 100644 --- a/api/libs/agents/src/tools/categories/response_tools/done.rs +++ b/api/libs/agents/src/tools/categories/response_tools/done.rs @@ -2,8 +2,9 @@ use anyhow::Result; use async_trait::async_trait; use serde::{Deserialize, Serialize}; use serde_json::Value; +use std::sync::Arc; -use crate::tools::ToolExecutor; +use crate::{agent::Agent, tools::ToolExecutor}; #[derive(Debug, Deserialize, Serialize)] pub struct DoneInput { @@ -14,13 +15,16 @@ pub struct DoneInput { #[derive(Debug, Serialize, Deserialize)] pub struct DoneOutput { pub success: bool, + pub todos: String, } -pub struct Done; +pub struct Done { + agent: Arc, +} impl Done { - pub fn new() -> Self { - Self + pub fn new(agent: Arc) -> Self { + Self { agent } } } @@ -34,15 +38,60 @@ impl ToolExecutor for Done { } async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result { + // Get the current todos from state + let mut todos = match self.agent.get_state_value("todos").await { + Some(Value::Array(arr)) => arr, + _ => { + // If no todos exist, just return success without a list + return Ok(DoneOutput { success: true, todos: "No to-do list found.".to_string() }); + } + }; + + let mut marked_by_done = vec![]; // Track items marked by this tool + + // Mark all remaining unfinished todos as complete + for (idx, todo_val) in todos.iter_mut().enumerate() { + if let Value::Object(map) = todo_val { + let is_completed = map.get("completed").and_then(Value::as_bool).unwrap_or(false); + if !is_completed { + map.insert("completed".to_string(), Value::Bool(true)); + marked_by_done.push(idx); // Track 0-based index + } + } else { + // Handle invalid item format if necessary, maybe log a warning? + eprintln!("Warning: Invalid todo item format at index {}", idx); + } + } + + // Save the updated todos back to state + self.agent.set_state_value("todos".to_string(), Value::Array(todos.clone())).await; // Clone needed for iteration below + + + // Format the output string, potentially noting items marked by 'done' + let todos_string = todos.iter().enumerate() + .map(|(idx, todo_val)| { + if let Value::Object(map) = todo_val { + let completed = map.get("completed").and_then(Value::as_bool).unwrap_or(false); // Should always be true now + let todo_text = map.get("todo").and_then(Value::as_str).unwrap_or("Invalid todo text"); + let annotation = if marked_by_done.contains(&idx) { " *Marked complete by calling the done tool" } else { "" }; + format!("[x] {}{}", todo_text, annotation) + } else { + "Invalid todo item format".to_string() + } + }) + .collect::>() + .join("\n"); + + // This tool signals the end of the workflow and provides the final response. // The actual agent termination logic resides elsewhere. - Ok(DoneOutput { success: true }) + Ok(DoneOutput { success: true, todos: todos_string }) // Include todos in output } async fn get_schema(&self) -> Value { serde_json::json!({ "name": self.get_name(), - "description": "Use when you have completed your workflow and are ready to send a final response to the user.", + "description": "Marks all remaining unfinished tasks as complete, sends a final response to the user, and ends the workflow. Use this when the workflow is finished.", "parameters": { "type": "object", "required": [ diff --git a/api/libs/handlers/src/chats/post_chat_handler.rs b/api/libs/handlers/src/chats/post_chat_handler.rs index b8d7894bd..1d073496b 100644 --- a/api/libs/handlers/src/chats/post_chat_handler.rs +++ b/api/libs/handlers/src/chats/post_chat_handler.rs @@ -1503,7 +1503,7 @@ fn transform_tool_message( "done" | "message_notify_user" | "message_user_clarifying_question" => vec![], // Add specific handling for no_search_needed to return nothing - "no_search_needed" => vec![], + "no_search_needed" | "review_plan" => vec![], // Existing tool result processing - pass duration "search_data_catalog" => tool_data_catalog_search(id.clone(), content, delta_duration)?, @@ -2157,7 +2157,7 @@ fn transform_assistant_tool_message( // parser.clear_related_chunks(tool_id.clone()); // Example hypothetical parser method } } - "no_search_needed" => { + "no_search_needed" | "review_plan" => { // Clear tracker since this tool doesn't use chunking for its reasoning output tracker.clear_chunk(tool_id.clone()); }