Merge branch 'evals' of https://github.com/buster-so/buster into evals

2025-04-10 14:09:27 -06:00 · 2025-04-10 14:09:27 -06:00 · a35d0ccbff
parent 0acc1a24b2 b0797e6f6f
commit a35d0ccbff
24 changed files with 1452 additions and 840 deletions
--- a/api/libs/agents/src/agent.rs
+++ b/api/libs/agents/src/agent.rs
--- a/api/libs/agents/src/agents/buster_cli_agent.rs
+++ b/api/libs/agents/src/agents/buster_cli_agent.rs
@ -1,18 +1,50 @@
 use anyhow::Result;
-use std::{collections::HashMap, env, sync::Arc};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::{collections::HashMap, sync::Arc};
 use tokio::sync::broadcast;
-use uuid::Uuid;
+use uuid::Uuid; // Add for Value

 use crate::{
    agent::{Agent, AgentError, AgentExt},
    models::AgentThread,
-    tools::{ // Import the CLI tools
-        EditFileContentTool, FindFilesGlobTool, ListDirectoryTool, ReadFileContentTool, RunBashCommandTool, SearchFileContentGrepTool, WriteFileContentTool
-    }, ToolExecutor,
+    tools::{
+        // Import necessary tools
+        categories::cli_tools::{
+            // Import CLI tools using correct struct names from mod.rs
+            EditFileContentTool,       // Use correct export
+            FindFilesGlobTool,         // Use correct export
+            ListDirectoryTool,         // Use correct export
+            ReadFileContentTool,       // Use correct export
+            RunBashCommandTool,        // Use correct export
+            SearchFileContentGrepTool, // Use correct export
+            WriteFileContentTool,      // Use correct export
+        },
+        IntoToolCallExecutor,
+        ToolExecutor,
+    },
 };

 use litellm::AgentMessage;

+// Type alias for the enablement condition closure
+type EnablementCondition = Box<dyn Fn(&HashMap<String, Value>) -> bool + Send + Sync>;
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BusterCliAgentOutput {
+    pub message: String,
+    pub duration: i64,
+    pub thread_id: Uuid,
+    pub messages: Vec<AgentMessage>,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct BusterCliAgentInput {
+    pub prompt: String,
+    pub thread_id: Option<Uuid>,
+    pub message_id: Option<Uuid>,
+}
+
 #[derive(Clone)]
 pub struct BusterCliAgent {
    agent: Arc<Agent>,
@ -26,42 +58,85 @@ impl AgentExt for BusterCliAgent {

 impl BusterCliAgent {
    async fn load_tools(&self) -> Result<()> {
-        // Create tools using the shared Arc
+        // Create tools using the shared Arc and correct struct names
        let bash_tool = RunBashCommandTool::new(Arc::clone(&self.agent));
+        let edit_file_tool = EditFileContentTool::new(Arc::clone(&self.agent));
        let glob_tool = FindFilesGlobTool::new(Arc::clone(&self.agent));
        let grep_tool = SearchFileContentGrepTool::new(Arc::clone(&self.agent));
        let ls_tool = ListDirectoryTool::new(Arc::clone(&self.agent));
-        let read_tool = ReadFileContentTool::new(Arc::clone(&self.agent));
-        let edit_tool = EditFileContentTool::new(Arc::clone(&self.agent));
-        let write_tool = WriteFileContentTool::new(Arc::clone(&self.agent));
+        let read_file_tool = ReadFileContentTool::new(Arc::clone(&self.agent));
+        let write_file_tool = WriteFileContentTool::new(Arc::clone(&self.agent));

-        // Add tools to the agent
-        self.agent.add_tool(bash_tool.get_name(), bash_tool).await;
-        self.agent.add_tool(glob_tool.get_name(), glob_tool).await;
-        self.agent.add_tool(grep_tool.get_name(), grep_tool).await;
-        self.agent.add_tool(ls_tool.get_name(), ls_tool).await;
-        self.agent.add_tool(read_tool.get_name(), read_tool).await;
-        self.agent.add_tool(edit_tool.get_name(), edit_tool).await;
-        self.agent.add_tool(write_tool.get_name(), write_tool).await;
+        // Add tools - Pass None directly since these tools are always enabled
+        self.agent
+            .add_tool(
+                bash_tool.get_name(),
+                bash_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                edit_file_tool.get_name(),
+                edit_file_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                glob_tool.get_name(),
+                glob_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                grep_tool.get_name(),
+                grep_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                ls_tool.get_name(),
+                ls_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                read_file_tool.get_name(),
+                read_file_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;
+        self.agent
+            .add_tool(
+                write_file_tool.get_name(),
+                write_file_tool.into_tool_call_executor(),
+                None::<EnablementCondition>,
+            )
+            .await;

        Ok(())
    }

    pub async fn new(
-        user_id: Uuid, 
-        session_id: Uuid, 
-        api_key: Option<String>, // Add parameter
-        base_url: Option<String> // Add parameter
+        user_id: Uuid,
+        session_id: Uuid,
+        api_key: Option<String>,  // Add parameter
+        base_url: Option<String>, // Add parameter
+        cwd: Option<String>,      // Add parameter
    ) -> Result<Self> {
        // Create agent with o3-mini model and empty tools map initially
        let agent = Arc::new(Agent::new(
            "o3-mini".to_string(), // Use o3-mini as requested
-            HashMap::new(),
            user_id,
            session_id,
            "buster_cli_agent".to_string(),
-            api_key, // Pass through
-            base_url // Pass through
+            api_key,  // Pass through
+            base_url, // Pass through
+            get_system_message(&cwd.unwrap_or_else(|| ".".to_string())),
        ));

        let cli_agent = Self { agent };
@ -69,19 +144,30 @@ impl BusterCliAgent {
        Ok(cli_agent)
    }

-    // Optional: Add from_existing if needed later, similar to BusterSuperAgent
-    // pub async fn from_existing(existing_agent: &Arc<Agent>) -> Result<Self> { ... }
+    pub async fn from_existing(existing_agent: &Arc<Agent>) -> Result<Self> {
+        let agent = Arc::new(Agent::from_existing(
+            existing_agent,
+            "buster_cli_agent".to_string(),
+            "You are a helpful CLI assistant. Use the available tools to interact with the file system and execute commands.".to_string()
+        ));
+        let manager = Self { agent };
+        manager.load_tools().await?; // Load tools with None condition
+        Ok(manager)
+    }

    pub async fn run(
        &self,
        thread: &mut AgentThread,
-        cwd: &str, // Accept current working directory
+        initialization_prompt: Option<String>, // Allow optional prompt
    ) -> Result<broadcast::Receiver<Result<AgentMessage, AgentError>>> {
-        thread.set_developer_message(get_system_message(cwd)); // Pass cwd to system message
+        if let Some(prompt) = initialization_prompt {
+            thread.set_developer_message(prompt);
+        } else {
+            // Maybe set a default CLI prompt?
+            thread.set_developer_message("You are a helpful CLI assistant. Use the available tools to interact with the file system and execute commands.".to_string());
+        }

-        // Get shutdown receiver and start processing
        let rx = self.stream_process_thread(thread).await?;
-
        Ok(rx)
    }

@ -95,7 +181,8 @@ impl BusterCliAgent {
 fn get_system_message(cwd: &str) -> String {
    // Simple fallback if Braintrust isn't configured
    // Consider adding Braintrust support similar to BusterSuperAgent if needed
-    format!(r#"
+    format!(
+        r#"
 ### Role & Task
 You are Buster CLI, a helpful AI assistant operating directly in the user's command line environment.
 Your primary goal is to assist the user with file system operations, file content manipulation, and executing shell commands based on their requests.
@ -120,5 +207,7 @@ The user is currently operating in the following directory: `{}`
 3.  **File Paths:** Assume relative paths are based on the user's *Current Working Directory* unless the user provides an absolute path.
 4.  **Conciseness:** Provide responses suitable for a terminal interface. Use markdown for code blocks when showing file content or commands.
 5.  **No Assumptions:** Don't assume files or directories exist unless you've verified with `list_directory` or `find_files_glob`.
-"#, cwd)
-} 
+"#,
+        cwd
+    )
+}
--- a/api/libs/agents/src/agents/buster_multi_agent.rs
+++ b/api/libs/agents/src/agents/buster_multi_agent.rs
--- a/api/libs/agents/src/agents/buster_super_agent.rs
+++ b/api/libs/agents/src/agents/buster_super_agent.rs
@ -1,290 +0,0 @@
-use anyhow::Result;
-use braintrust::{get_prompt_system_message, BraintrustClient};
-use serde::{Deserialize, Serialize};
-use std::sync::Arc;
-use std::{collections::HashMap, env};
-use tokio::sync::broadcast;
-use uuid::Uuid;
-
-use crate::{
-    tools::{
-        categories::{
-            file_tools::{
-                CreateDashboardFilesTool, CreateMetricFilesTool, ModifyDashboardFilesTool,
-                ModifyMetricFilesTool, SearchDataCatalogTool,
-            },
-            planning_tools::CreatePlan,
-        },
-        IntoToolCallExecutor, ToolExecutor,
-    },
-    Agent, AgentError, AgentExt, AgentThread,
-};
-
-use litellm::AgentMessage;
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct BusterSuperAgentOutput {
-    pub message: String,
-    pub duration: i64,
-    pub thread_id: Uuid,
-    pub messages: Vec<AgentMessage>,
-}
-
-#[derive(Debug, Deserialize, Serialize)]
-pub struct BusterSuperAgentInput {
-    pub prompt: String,
-    pub thread_id: Option<Uuid>,
-    pub message_id: Option<Uuid>,
-}
-
-pub struct BusterSuperAgent {
-    agent: Arc<Agent>,
-}
-
-impl AgentExt for BusterSuperAgent {
-    fn get_agent(&self) -> &Arc<Agent> {
-        &self.agent
-    }
-}
-
-impl BusterSuperAgent {
-    async fn load_tools(&self) -> Result<()> {
-        // Create tools using the shared Arc
-        let search_data_catalog_tool = SearchDataCatalogTool::new(Arc::clone(&self.agent));
-        let create_plan_tool = CreatePlan::new(Arc::clone(&self.agent));
-        let create_metric_files_tool = CreateMetricFilesTool::new(Arc::clone(&self.agent));
-        let modify_metric_files_tool = ModifyMetricFilesTool::new(Arc::clone(&self.agent));
-        let create_dashboard_files_tool = CreateDashboardFilesTool::new(Arc::clone(&self.agent));
-        let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(Arc::clone(&self.agent));
-
-        // Add tools to the agent
-        self.agent
-            .add_tool(
-                search_data_catalog_tool.get_name(),
-                search_data_catalog_tool.into_tool_call_executor(),
-            )
-            .await;
-        self.agent
-            .add_tool(
-                create_metric_files_tool.get_name(),
-                create_metric_files_tool.into_tool_call_executor(),
-            )
-            .await;
-        self.agent
-            .add_tool(
-                modify_metric_files_tool.get_name(),
-                modify_metric_files_tool.into_tool_call_executor(),
-            )
-            .await;
-        self.agent
-            .add_tool(
-                create_dashboard_files_tool.get_name(),
-                create_dashboard_files_tool.into_tool_call_executor(),
-            )
-            .await;
-        self.agent
-            .add_tool(
-                modify_dashboard_files_tool.get_name(),
-                modify_dashboard_files_tool.into_tool_call_executor(),
-            )
-            .await;
-        self.agent
-            .add_tool(
-                create_plan_tool.get_name(),
-                create_plan_tool.into_tool_call_executor(),
-            )
-            .await;
-
-        Ok(())
-    }
-
-    pub async fn new(user_id: Uuid, session_id: Uuid) -> Result<Self> {
-        // Create agent with empty tools map
-        let agent = Arc::new(Agent::new(
-            "o3-mini".to_string(),
-            HashMap::new(),
-            user_id,
-            session_id,
-            "buster_super_agent".to_string(),
-            None,
-            None,
-        ));
-
-        let manager = Self { agent };
-        manager.load_tools().await?;
-        Ok(manager)
-    }
-
-    pub async fn from_existing(existing_agent: &Arc<Agent>) -> Result<Self> {
-        // Create a new agent with the same core properties and shared state/stream
-        let agent = Arc::new(Agent::from_existing(
-            existing_agent,
-            "buster_super_agent".to_string(),
-        ));
-        let manager = Self { agent };
-        manager.load_tools().await?;
-        Ok(manager)
-    }
-
-    pub async fn run(
-        &self,
-        thread: &mut AgentThread,
-    ) -> Result<broadcast::Receiver<Result<AgentMessage, AgentError>>> {
-        thread.set_developer_message(get_system_message().await);
-
-        // Get shutdown receiver
-        let rx = self.stream_process_thread(thread).await?;
-
-        Ok(rx)
-    }
-
-    /// Shutdown the manager agent and all its tools
-    pub async fn shutdown(&self) -> Result<()> {
-        self.get_agent().shutdown().await
-    }
-}
-
-async fn get_system_message() -> String {
-    if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return BUSTER_SUPER_AGENT_PROMPT.to_string();
-    }
-
-    let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
-    match get_prompt_system_message(&client, "12e4cf21-0b49-4de7-9c3f-a73c3e233dad").await {
-        Ok(message) => message,
-        Err(e) => {
-            eprintln!("Failed to get prompt system message: {}", e);
-            BUSTER_SUPER_AGENT_PROMPT.to_string()
-        }
-    }
-}
-
-const BUSTER_SUPER_AGENT_PROMPT: &str = r##"### Role & Task
-You are Buster, an expert analytics and data engineer. Your job is to assess what data is available and then provide fast, accurate answers to analytics questions from non-technical users. You do this by analyzing user requests, searching across a data catalog, and building metrics or dashboards.
---
-### Actions Available (Tools)
-*All actions will become available once the environment is ready and dependencies are met.*
- **search_data_catalog**  
-  - *Purpose:* Find what data is available for analysis (returns metadata, relevant datasets, documentation, and column details).  
-  - *When to use:* Before any analysis is performed or whenever you need context about the available data.  
-  - *Dependencies:* This action is always available.
- **create_plan**  
-  - *Purpose:* Define the goal and outline a plan for analysis.  
-  - *When to use:* Before starting any analysis.  
-  - *Dependencies:* This action will only be available after the `search_data_catalog` action has been called at least once.
- **create_metrics**  
-  - *Purpose:* Create new metrics.  
-  - *When to use:* For creating individual visualizations. These visualizations can either be returned to the user directly, or added to a dashboard that gets returned to the user. This tool is capable of writing SQL statements and building visualizations.  
-  - *Dependencies:* This action will only be available after the `search_data_catalog` and `create_plan` actions have been called.
- **update_metrics**  
-  - *Purpose:* Update or modify existing metrics/visualizations.  
-  - *When to use:* For updating or modifying visualizations. This tool is capable of editing SQL statements and modifying visualization configurations.  
-  - *Dependencies:* This action will only be available after the `search_data_catalog` and `create_plan` actions have been called, and at least one metric has been created (i.e., after `create_metrics` has been called at least once).
- **create_dashboards**  
-  - *Purpose:* Create dashboards and display multiple metrics in one cohesive view.  
-  - *When to use:* For creating new dashboards and adding multiple visualizations to it. For organizing several metrics together. Dashboards are sent directly to the user upon completion. You need to use `create_metrics` before you can save metrics to a dashboard.  
-  - *Dependencies:* This action will only be available after the `search_data_catalog` and `create_plan` actions have been called, and at least one metric has been created (i.e., after `create_metrics` has been called at least once).
- **update_dashboards**  
-  - *Purpose:* Update or modify existing dashboards.  
-  - *When to use:* For updating or modifying a dashboard. For rearranging the visualizations, editing the display, or adding/removing visualizations from the dashboard. This is not capable of updating the SQL or styling characteristics of individual metrics (even if they are saved to the dashboard).  
-  - *Dependencies:* This action will only be available after the `search_data_catalog` and `create_plan` actions have been called, and at least one dashboard has been created (i.e., after `create_dashboards` has been called at least once).
---
-### Key Workflow Reminders
-1. **Checking the data catalog first**  
-  - You cannot assume that any form or type of data exists prior to searching the data catalog.
-  - Prior to creating a plan or doing any kind of task/workflow, you must search the catalog to have sufficient context about the datasets you can query.
-  - If you have sufficient context (i.e. you searched the data catalog in a previous workflow) you do not need to search the data catalog again.
-2. **Answering questions about available data**  
-  - Sometimes users will ask things like "What kinds of reports can you build me?" or "What metrics can you get me about {topic_or_item}?" or "What data do you have access to?" or "How can you help me understand {topic_or_item}?. In these types of scenarios, you should search the data catalog, assess the available data, and then respond to the user.
-  - Your response should be simple, clear, and offer the user an suggestion for how you can help them or proceed.
-3. **Assessing search results from the data catalog**  
-  - Before creating a plan, you should always assess the search results from the data catalog. If the data catalog doesn't contain relevant or adequate data to answer the user request, you should respond and inform the user.
-4. **Explaining if something is impossible or not supported**  
-  - If a user requests any of the following, briefly address it and let them know that you cannot:  
-    - *Write Operations:* You can only perform read operations on the database or warehouse. You cannot perform write operations. You are only able to query existing models/tables/datasets/views. 
-    - *Forecasting & Python Analysis:* You are not currently capable of using Python or R (i.e. analyses like modeling, what-if analysis, hypothetical scenario analysis, predictive forecasting, etc). You are only capable of querying historical data using SQL. These capabilities are currently in a beta state and will be generally available in the coming months.
-    - *Unsupported Chart Types:* You are only capable of building the following visualizaitons - are table, line, multi-axis combo, bar, histogram, pie/donut, number cards, scatter plot. Other chart types are not currently supported.
-    - *Unspecified Actions:* You cannot perform any actions outside your specified capabilities (e.g. you are unable to send emails, schedule reports, integrate with other applicaitons, update data pipelines, etc).  
-    - *Web App Actions:* You are operating as a feature within a web app. You cannot control other features or aspects of the web application (i.e. adding users to the workspace, sharing things, exporting things, creating or adding metrics/dashboards to collections or folders, searching across previously built metrics/dashboards/chats/etc). These user will need to do these kind of actions manually through the UI. Inform them of this and let them know that they can contact our team, contact their system admin, or read our docs for additional help.
-    - *Non-data related requests:* You should not answer requests that aren't specifically related to data analysis. Do not address requests that are non-data related.
-  - You should finish your response to these types of requests with an open-ended offer of something that you can do to help them.
-  - If part of a request is doable, but another part is not (i.e. build a dashboard and send it to another user) you should perform the analysis/workflow, then address the aspects of the user request that you weren't able to perform in your final response (after the analysis is completed).
-5. **Starting tasks right away**  
-  - If you're going to take any action (searching the data catalog, creating a plan, building metrics or dashboards, or modifying metrics/dashboards), begin immediately without messaging the user first.  
-  - Do not immediately respond to the user unless you're planning to take no action.. You should never preface your workflow with a response or sending a message to the user.
-  - Oftentimes, you must begin your workflow by searching the data catalog to have sufficient context. Once this is accomplished, you will have access to other actions (like creating a plan).
-6. **Handling vague, nuanced, or broad requests**  
-  - The user may send requests that are extremely broad, vague, or nuanced. These are some examples of vague or broad requests you might get from users...
-    - who are our top customers
-    - how does our perfomance look lately
-    - what kind of things should we be monitoring
-    - build a report of important stuff
-    - etc
-  - In these types of vague or nuanced scenarios, you should attempt to build a dashboard of available data. You should not respond to the user immediately. Instead, your workflow should be: search the data catalog, assess the available data, and then create a plan for your analysis.
-  - You should **never ask the user to clarify** things before doing your analysis.
-7. **Handling goal, KPI or initiative focused requests**  
-  - The user may send requests that want you to help them accomplish a goal, hit a KPI, or improve in some sort of initiative. These are some examples of initiative focused requests you might get from users...
-    - how can we improve our business
-    - i want to improve X, how do I do it?
-    - what can I do to hit X goal
-    - we are trying to hit this KPI, how do we do it?
-    - i want to increase Y, how do we do it?
-    - etc
-  - In these types of initiative focused scenarios, you should attempt to build a dashboard of available data. You should not respond to the user immediately. Instead, your workflow should be: search the data catalog, assess the available data, and then create a plan for your analysis..
-  - You should **never ask the user to clarify** things before doing your analysis.
---
-### Understanding What Gets Sent to the User
- **Real-Time Visibility**: The user can observe your actions as they happen, such as searching the data catalog or creating a plan.
- **Final Output**: When you complete your task, the user will receive the metrics or dashboards you create, presented based on the following rules:
-#### For Metrics Not Added to a Dashboard
- **Single Metric**: If you create or update just one metric and do not add it to a dashboard, the user will see that metric as a standalone chart.
- **Multiple Metrics**: If you create or update multiple metrics without adding them to a dashboard, each metric will be returned as an individual chart. The user can view these charts one at a time (e.g., by navigating through a list), with the most recently created or updated chart displayed first by default.
-#### For Dashboards
- **New or Updated Dashboard**: If you create or update a dashboard, the user will see the entire dashboard, which displays all the metrics you've added to it in a unified view.
- **Updates to Dashboard Metrics**: If you update metrics that are already part of a dashboard, the user will see the dashboard with those metrics automatically reflecting the updates.
---
-### SQL Best Practices and Constraints** (when creating new metrics)  
- **Constraints**: Only join tables with explicit entity relationships.  
- **SQL Requirements**:  
-  - Use schema-qualified table names (`<SCHEMA_NAME>.<TABLE_NAME>`).  
-  - Select specific columns (avoid `SELECT *` or `COUNT(*)`).  
-  - Use CTEs instead of subqueries, and use snake_case for naming them.  
-  - Use `DISTINCT` (not `DISTINCT ON`) with matching `GROUP BY`/`SORT BY` clauses.  
-  - Show entity names rather than just IDs.  
-  - Handle date conversions appropriately.  
-  - Order dates in ascending order.
-  - Reference database identifiers for cross-database queries.  
-  - Format output for the specified visualization type.  
-  - Maintain a consistent data structure across requests unless changes are required.  
-  - Use explicit ordering for custom buckets or categories.
-  - When grouping metrics by dates, default to monthly granularity for spans over 2 months, yearly for over 3 years, weekly for under 2 months, and daily for under a week, unless the user specifies a different granularity.
---
-### Response Guidelines and Format
- Answer in simple, clear language for non-technical users, avoiding tech terms.  
- Don't mention tools, actions, or technical details in responses.  
- Explain how you completed the task after finishing.
- Your responses should be very simple.
- Your tone should not be formal.
- Do not include yml or reference file names directly.
- Do not include any SQL, Python, or other code in your final responses.
- Never ask the user to clarify anything.
- Your response should be in markdown and can use bullets or number lists whenever necessary (but you should never use headers or sub-headers)
- Respond in the first person.
- As an expert analytics and data engineer, you are capable of giving direct advice based on the analysis you perform.
-### Example of a Good Response
-[A single metric was created]
-This line chart displays the monthly sales for each sales rep. Here's a breakdown of how this is being calculated:
-1. I searched through your data catalog and found a dataset that has a log of orders. It also includes a column for the sales rep that closed the order.
-2. I took the sum of revenue generated by all of your orders from the last 12 months.
-3. I filtered the revenue by sales rep.
-It looks like Nate Kelley is one of your standout sales reps. He is consistently closing more revenue than other sales reps in most months of the year.
--- 
-### Summary & Additional Info
- If you're going to take action, begin immediately. Never respond to the user until you have completed your workflow
- Search the data catalog first, unless you have context
- **Never ask clarifying questions**
- Any assets created, modified, or referenced will automatically be shown to the user
- Under the hood, you use state of the art encryption and have rigorous security protocols and policies in place.
- Currently, you are not able to do things that require Python. You are only capable of querying historical data using SQL statements.
- Keep final responses clear, simple and concise, focusing on what was accomplished.
- You cannot assume that any form of data exists prior to searching the data catalog."##;
--- a/api/libs/agents/src/agents/mod.rs
+++ b/api/libs/agents/src/agents/mod.rs
@ -1,7 +1,5 @@
 pub mod buster_multi_agent;
-pub mod buster_super_agent;
 pub mod buster_cli_agent;

 pub use buster_multi_agent::BusterMultiAgent;
-pub use buster_super_agent::BusterSuperAgent;
 pub use buster_cli_agent::BusterCliAgent;
--- a/api/libs/agents/src/tools/categories/agents_as_tools/hand_off_tool.rs
+++ b/api/libs/agents/src/tools/categories/agents_as_tools/hand_off_tool.rs
@ -1,120 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::{anyhow, Result};
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use serde_json::Value;
-
-use crate::{agent::Agent, tools::ToolExecutor};
-
-/// Parameters for the HandOffTool
-#[derive(Debug, Serialize, Deserialize)]
-pub struct HandOffParams {
-    /// The ID or name of the agent to hand off to
-    pub target_agent: String,
-}
-
-/// Output from the HandOffTool
-#[derive(Debug, Serialize, Deserialize)]
-pub struct HandOffOutput {}
-
-/// Tool for handing off a conversation to another agent
-pub struct HandOffTool {
-    agent: Arc<Agent>,
-    available_target_agents: Vec<String>,
-}
-
-impl HandOffTool {
-    /// Create a new HandOffTool
-    pub fn new(agent: Arc<Agent>) -> Self {
-        Self {
-            agent,
-            available_target_agents: Vec::new(),
-        }
-    }
-
-    /// Create a new HandOffTool with a list of available target agents
-    pub fn new_with_target_agents(agent: Arc<Agent>, target_agents: Vec<String>) -> Self {
-        Self {
-            agent,
-            available_target_agents: target_agents,
-        }
-    }
-
-    /// Update the available target agents
-    pub fn set_available_target_agents(&mut self, target_agents: Vec<String>) {
-        self.available_target_agents = target_agents;
-    }
-
-    fn get_hand_off_description() -> String {
-        "Hands off the current conversation to another agent. This allows a specialized agent to take over the conversation when the current agent reaches the limits of its capabilities.".to_string()
-    }
-
-    fn get_target_agent_description() -> String {
-        "The ID or name of the agent to hand off to. This should be the identifier of an existing agent in the system.".to_string()
-    }
-
-    fn get_context_description() -> String {
-        "Optional context to provide to the target agent. This allows passing additional information about why the handoff is occurring and what the target agent should focus on.".to_string()
-    }
-
-    fn get_transfer_history_description() -> String {
-        "Optional flag to indicate whether the conversation history should be transferred to the target agent. Defaults to true if not specified.".to_string()
-    }
-}
-
-#[async_trait]
-impl ToolExecutor for HandOffTool {
-    type Output = HandOffOutput;
-    type Params = HandOffParams;
-
-    fn get_name(&self) -> String {
-        "hand_off".to_string()
-    }
-
-    async fn is_enabled(&self) -> bool {
-        // This tool should always be available when multiple agents are configured
-        true
-    }
-
-    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output> {
-        let target_agent_id = params.target_agent;
-
-        // Here we would implement the actual handoff logic:
-        // 1. Notify the target agent
-        // 2. Transfer conversation history if requested
-        // 3. Update conversation state
-        // 4. Redirect user to the new agent
-
-        // TODO: Implement actual handoff logic
-        // For now, we'll return a stub response
-
-        Ok(HandOffOutput {})
-    }
-
-    async fn get_schema(&self) -> Value {
-        serde_json::json!({
-            "name": self.get_name(),
-            "description": Self::get_hand_off_description(),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "target_agent": {
-                      "type": "string",
-                      "description": Self::get_target_agent_description(),
-                      "enum": self.available_target_agents
-                    },
-                    "context": {
-                        "type": "string",
-                        "description": Self::get_context_description(),
-                    },
-                    "transfer_history": {
-                        "type": "boolean",
-                        "description": Self::get_transfer_history_description(),
-                    },
-                },
-                "required": ["target_agent"],
-            },
-        })
-    }
-}
--- a/api/libs/agents/src/tools/categories/agents_as_tools/mod.rs
+++ b/api/libs/agents/src/tools/categories/agents_as_tools/mod.rs
@ -1,3 +0,0 @@
-pub mod hand_off_tool;
-
-pub use hand_off_tool::HandOffTool;
--- a/api/libs/agents/src/tools/categories/cli_tools/bash_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/bash_tool.rs
@ -3,10 +3,8 @@ use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::process::Command;
-use crate::{
-    agent::Agent,
-    tools::ToolExecutor
-};
+use crate::{agent::Agent, tools::ToolExecutor};
+use anyhow::Result;

 #[derive(Serialize, Deserialize, Debug)]
 pub struct RunBashParams {
@ -22,12 +20,12 @@ pub struct RunBashOutput {
 }

 pub struct RunBashCommandTool {
-    agent: Arc<Agent>,
+    _agent: Arc<Agent>,
 }

 impl RunBashCommandTool {
    pub fn new(agent: Arc<Agent>) -> Self {
-        Self { agent }
+        Self { _agent: agent }
    }
 }

@ -37,32 +35,32 @@ impl ToolExecutor for RunBashCommandTool {
    type Params = RunBashParams;

    fn get_name(&self) -> String {
-        "run_bash_command".to_string()
+        "bash".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
-    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
+    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output> {
        let mut command = Command::new("sh");
        command.arg("-c").arg(&params.command);

        if let Some(dir) = &params.working_directory {
-            command.current_dir(dir);
+            if std::path::Path::new(dir).is_dir() {
+                command.current_dir(dir);
+            } else {
+                return Err(anyhow::anyhow!("Working directory '{}' not found or is not a directory.", dir));
+            }
        }

        match command.output() {
-            Ok(output) => {
-                Ok(RunBashOutput {
-                    stdout: String::from_utf8_lossy(&output.stdout).to_string(),
-                    stderr: String::from_utf8_lossy(&output.stderr).to_string(),
-                    exit_code: output.status.code(),
-                })
-            }
-            Err(e) => {
-                Err(anyhow::anyhow!("Failed to execute command '{}': {}", params.command, e))
-            }
+            Ok(output) => Ok(RunBashOutput {
+                stdout: String::from_utf8_lossy(&output.stdout).to_string(),
+                stderr: String::from_utf8_lossy(&output.stderr).to_string(),
+                exit_code: output.status.code(),
+            }),
+            Err(e) => Err(anyhow::anyhow!(
+                "Failed to execute command '{}': {}",
+                params.command,
+                e
+            )),
        }
    }

--- a/api/libs/agents/src/tools/categories/cli_tools/edit_file_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/edit_file_tool.rs
@ -74,10 +74,6 @@ impl ToolExecutor for EditFileContentTool {
        "edit_file_content".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let file_path = Path::new(&params.file_path);
        if !file_path.exists() {
--- a/api/libs/agents/src/tools/categories/cli_tools/glob_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/glob_tool.rs
@ -39,10 +39,6 @@ impl ToolExecutor for FindFilesGlobTool {
        "find_files_glob".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let base_path = match params.base_directory {
            Some(dir) => PathBuf::from(dir),
--- a/api/libs/agents/src/tools/categories/cli_tools/grep_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/grep_tool.rs
@ -49,10 +49,6 @@ impl ToolExecutor for SearchFileContentGrepTool {
        "search_file_content_grep".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let mut matches = Vec::new();
        let use_regex = params.use_regex.unwrap_or(false);
--- a/api/libs/agents/src/tools/categories/cli_tools/ls_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/ls_tool.rs
@ -82,10 +82,6 @@ impl ToolExecutor for ListDirectoryTool {
        "list_directory".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let path = PathBuf::from(&params.path);
        if !path.exists() {
--- a/api/libs/agents/src/tools/categories/cli_tools/read_file_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/read_file_tool.rs
@ -42,10 +42,6 @@ impl ToolExecutor for ReadFileContentTool {
        "read_file_content".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let file_path = Path::new(&params.file_path);
        if !file_path.exists() {
--- a/api/libs/agents/src/tools/categories/cli_tools/write_file_tool.rs
+++ b/api/libs/agents/src/tools/categories/cli_tools/write_file_tool.rs
@ -41,10 +41,6 @@ impl ToolExecutor for WriteFileContentTool {
        "write_file_content".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output, anyhow::Error> {
        let file_path = Path::new(&params.file_path);
        let overwrite = params.overwrite.unwrap_or(false);
--- a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs
@ -130,13 +130,6 @@ impl ToolExecutor for CreateDashboardFilesTool {
        "create_dashboards".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        matches!((
-            self.agent.get_state_value("metrics_available").await,
-            self.agent.get_state_value("plan_available").await,
-        ), (Some(_), Some(_)))
-    }
-
    async fn execute(&self, params: Self::Params, tool_call_id: String) -> Result<Self::Output> {
        let start_time = Instant::now();

--- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
@ -76,13 +76,6 @@ impl ToolExecutor for CreateMetricFilesTool {
        "create_metrics".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        matches!((
-            self.agent.get_state_value("data_context").await,
-            self.agent.get_state_value("plan_available").await,
-        ), (Some(_), Some(_)))
-    }
-
    async fn execute(&self, params: Self::Params, tool_call_id: String) -> Result<Self::Output> {
        let start_time = Instant::now();

--- a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs
@ -58,16 +58,6 @@ impl ToolExecutor for ModifyDashboardFilesTool {
        "update_dashboards".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        matches!(
-            (
-                self.agent.get_state_value("dashboards_available").await,
-                self.agent.get_state_value("plan_available").await,
-            ),
-            (Some(_), Some(_))
-        )
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output> {
        let start_time = Instant::now();

--- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
@ -61,16 +61,6 @@ impl ToolExecutor for ModifyMetricFilesTool {
        "update_metrics".to_string()
    }

-    async fn is_enabled(&self) -> bool {
-        matches!(
-            (
-                self.agent.get_state_value("metrics_available").await,
-                self.agent.get_state_value("plan_available").await,
-            ),
-            (Some(_), Some(_))
-        )
-    }
-
    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output> {
        let start_time = Instant::now();

--- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs
@ -366,10 +366,6 @@ impl ToolExecutor for SearchDataCatalogTool {
        })
    }

-    async fn is_enabled(&self) -> bool {
-        true
-    }
-
    fn get_name(&self) -> String {
        "search_data_catalog".to_string()
    }
--- a/api/libs/agents/src/tools/categories/mod.rs
+++ b/api/libs/agents/src/tools/categories/mod.rs
@ -7,7 +7,6 @@
 //! - interaction_tools: Tools for user interaction and UI manipulation
 //! - planning_tools: Tools for planning and scheduling

-pub mod agents_as_tools;
 pub mod file_tools;
 pub mod planning_tools;
 pub mod cli_tools; 
--- a/api/libs/agents/src/tools/categories/planning_tools/create_plan.rs
+++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan.rs
@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::env;
 use std::sync::Arc;
+use std::time::Instant;

 use crate::{agent::Agent, tools::ToolExecutor};

@ -39,6 +40,7 @@ impl ToolExecutor for CreatePlan {
    }

    async fn execute(&self, params: Self::Params, _tool_call_id: String) -> Result<Self::Output> {
+        let start_time = Instant::now();
        self.agent
            .set_state_value(String::from("plan_available"), Value::Bool(true))
            .await;
@ -49,10 +51,6 @@ impl ToolExecutor for CreatePlan {
        })
    }

-    async fn is_enabled(&self) -> bool {
-        self.agent.get_state_value("data_context").await.is_some()
-    }
-
    async fn get_schema(&self) -> Value {
        serde_json::json!({
            "name": self.get_name(),
--- a/api/libs/agents/src/tools/executor.rs
+++ b/api/libs/agents/src/tools/executor.rs
@ -21,9 +21,6 @@ pub trait ToolExecutor: Send + Sync {
    /// Get the name of this tool
    fn get_name(&self) -> String;

-    /// Check if this tool is currently enabled
-    async fn is_enabled(&self) -> bool;
-
    /// Handle shutdown signal. Default implementation does nothing.
    /// Tools should override this if they need to perform cleanup on shutdown.
    async fn handle_shutdown(&self) -> Result<()> {
@ -66,10 +63,6 @@ where
    fn get_name(&self) -> String {
        self.inner.get_name()
    }
-
-    async fn is_enabled(&self) -> bool {
-        self.inner.is_enabled().await
-    }
 }

 /// Implementation for Box<T> to enable dynamic dispatch
@ -89,10 +82,6 @@ impl<T: ToolExecutor<Output = Value, Params = Value> + Send + Sync> ToolExecutor
    fn get_name(&self) -> String {
        (**self).get_name()
    }
-
-    async fn is_enabled(&self) -> bool {
-        (**self).is_enabled().await
-    }
 }

 /// A trait to convert any ToolExecutor to a ToolCallExecutor
--- a/api/libs/agents/src/tools/mod.rs
+++ b/api/libs/agents/src/tools/mod.rs
@ -11,7 +11,6 @@ pub use executor::{ToolExecutor, ToolCallExecutor, IntoToolCallExecutor};
 // Re-export commonly used tool categories
 pub use categories::file_tools;
 pub use categories::planning_tools;
-pub use categories::agents_as_tools;

 // Re-export specific tools or entire categories
 pub use categories::cli_tools::{ 
--- a/api/libs/handlers/src/chats/post_chat_handler.rs
+++ b/api/libs/handlers/src/chats/post_chat_handler.rs
@ -11,7 +11,7 @@ use agents::{
        },
        planning_tools::CreatePlanOutput,
    },
-    AgentExt, AgentMessage, AgentThread, BusterSuperAgent,
+    AgentExt, AgentMessage, AgentThread, BusterMultiAgent,
 };

 use anyhow::{anyhow, Result};
@ -337,7 +337,9 @@ pub async fn post_chat_handler(
    }

    let mut initial_messages = vec![];
-    let agent = BusterSuperAgent::new(user.id, chat_id).await?;
+    // Determine if this is a follow-up message based on chat_id presence
+    let is_follow_up = request.chat_id.is_some();
+    let agent = BusterMultiAgent::new(user.id, chat_id, is_follow_up).await?;

    // Load context if provided (combines both legacy and new asset references)
    if let Some(existing_chat_id) = request.chat_id {