Merge branch 'evals' of https://github.com/buster-so/buster into evals

This commit is contained in:
Nate Kelley 2025-04-16 13:49:45 -06:00
commit 7b9ca21062
No known key found for this signature in database
GPG Key ID: FD90372AB8D98B4F
6 changed files with 97 additions and 154 deletions

View File

@ -16,9 +16,9 @@ use crate::tools::{
categories::{
file_tools::{
CreateDashboardFilesTool, CreateMetricFilesTool, ModifyDashboardFilesTool,
ModifyMetricFilesTool,
ModifyMetricFilesTool, SearchDataCatalogTool,
},
response_tools::{Done, MessageUserClarifyingQuestion},
response_tools::Done,
},
IntoToolCallExecutor,
};
@ -47,6 +47,7 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
let create_dashboard_files_tool = CreateDashboardFilesTool::new(agent_clone.clone());
let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(agent_clone.clone());
let done_tool = Done::new(agent_clone.clone());
let search_data_catalog_tool = SearchDataCatalogTool::new(agent_clone.clone());
// --- Define Conditions based on Agent State (as per original load_tools) ---
// Base condition: Plan and context must exist (implicitly true if we are in this mode)
@ -87,6 +88,9 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
review_needed || all_todos_complete
});
// Condition for search tool (always available)
let always_available = Some(|_state: &HashMap<String, Value>| -> bool { true });
// Add tools to the agent with conditions
agent_clone
.add_tool(
@ -123,6 +127,13 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
done_condition,
)
.await;
agent_clone
.add_tool(
search_data_catalog_tool.get_name(),
search_data_catalog_tool.into_tool_call_executor(),
always_available,
)
.await;
Ok(())
})
@ -142,7 +153,9 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
// Keep the prompt constant, but it's no longer pub
const PROMPT: &str = r##"### Role & Task
You are Buster, an expert analytics and data engineer. Your job is to assess what data is available and then provide fast, accurate answers to analytics questions from non-technical users. You do this by analyzing user requests, searching across a data catalog, and building metrics or dashboards.
You are Buster, an expert analytics and data engineer. Your job is to assess what data is available (provided via search results) and then provide fast, accurate answers to analytics questions from non-technical users. You do this by analyzing user requests, using the provided data context, and building metrics or dashboards.
**Crucially, you MUST only reference datasets, tables, columns, and values that have been explicitly provided to you through the results of data catalog searches in the conversation history or current context. Do not assume or invent data structures or content. Base all data operations strictly on the provided context.**
Today's date is {TODAYS_DATE}.
@ -150,14 +163,15 @@ Today's date is {TODAYS_DATE}.
## Workflow Summary
1. **Search the data catalog** to locate relevant data.
2. **Assess the adequacy** of the search results:
3. **Create a plan** using the appropriate create plan tool.
1. **Review the provided data context** from previous search steps.
2. **Assess the adequacy** of the *available* data context for the current request.
3. **Create a plan** using the appropriate create plan tool, based *only* on the available data.
4. **Execute the plan** by creating assets such as metrics or dashboards.
- Execute the plan to the best of your ability.
- If only certain aspects of the plan are possible, proceed to do whatever is possible.
- Execute the plan to the best of your ability using *only* the available data.
- If you encounter errors or realize data is missing *during* execution, use the appropriate search tool to find the necessary data *before* continuing or resorting to the `finish_and_respond` tool.
- If only certain aspects of the plan are possible with the available data (even after searching again), proceed to do whatever is possible.
5. **Send a final response to the user** with the `finish_and_respond` tool.
- If you were not able to accomplish all aspects of the user request, address the things that were not possible in your final response.
- If you were not able to accomplish all aspects of the user request (due to missing data that couldn't be found), address the things that were not possible in your final response.
---
@ -169,8 +183,9 @@ You have access to a set of tools to perform actions and deliver results. Adhere
2. **Follow the tool call schema precisely**, including all required parameters.
3. **Only use provided tools**, as availability may vary dynamically based on the task.
4. **Avoid mentioning tool names** in explanations or outputs (e.g., say "I searched the data catalog" instead of naming the tool).
5. **If the data required is not available**, use the `finish_and_respond` tool to inform the user (do not ask the user to provide you with the required data), signaling the end of your workflow.
6. **Do not ask clarifying questions.** If the user's request is ambiguous, do not ask clarifying questions. Make reasonable assumptions and proceed to accomplish the task.
5. **If the data required is not available** in your current context, first use the search tool to attempt to find it. If the necessary data *still* cannot be found after a reasonable search attempt, *then* use the `finish_and_respond` tool to inform the user, signaling the end of your workflow for that request.
6. **Do not ask clarifying questions.** If the user's request is ambiguous, make reasonable assumptions based on the *available data context* and proceed to accomplish the task.
7. **Strictly Adhere to Available Data**: Reiterate: NEVER reference datasets, tables, columns, or values not present in the data context provided by search tools. Do not hallucinate or invent data.
---
@ -247,140 +262,3 @@ pub const MODEL: Option<&str> = None;
pub fn get_prompt(todays_date: &str) -> String {
PROMPT.replace("{TODAYS_DATE}", todays_date)
}
// Load tools relevant to the analysis/execution mode
pub async fn load_tools(agent: &Arc<Agent>) -> Result<()> {
use crate::tools::{
categories::{
file_tools::{
// File tools are the core of this mode
CreateDashboardFilesTool,
CreateMetricFilesTool,
ModifyDashboardFilesTool,
ModifyMetricFilesTool,
},
response_tools::{
// Response tools for completion or issues
Done,
MessageUserClarifyingQuestion,
},
},
IntoToolCallExecutor,
};
let create_metric_files_tool = CreateMetricFilesTool::new(Arc::clone(agent));
let modify_metric_files_tool = ModifyMetricFilesTool::new(Arc::clone(agent));
let create_dashboard_files_tool = CreateDashboardFilesTool::new(Arc::clone(agent));
let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(Arc::clone(agent));
let message_user_clarifying_question_tool = MessageUserClarifyingQuestion::new();
let done_tool = Done::new(Arc::clone(agent));
// --- Store names before moving ---
let msg_clarifying_q_tool_name = message_user_clarifying_question_tool.get_name();
let done_tool_name = done_tool.get_name();
// --------------------------------
// --- Define Conditions based on Agent State ---
// These conditions depend on the plan and available assets, refinement needed based on actual state keys
// Base condition: Plan and context must exist (implicitly true if we are in this mode)
// let base_condition = |state: &HashMap<String, Value>| -> bool {
// state.contains_key("data_context") && state.contains_key("plan_available")
// };
// Create Metric: Plan exists, context exists
let create_metric_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context") && state.contains_key("plan_available")
});
// Modify Metric: Plan, context, and *some* metrics must exist
let modify_metric_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context")
&& state.contains_key("plan_available")
&& state.contains_key("metrics_available") // Check if state includes metrics
});
// Create Dashboard: Plan, context, and metrics must exist
let create_dashboard_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context")
&& state.contains_key("plan_available")
&& state.contains_key("metrics_available")
});
// Modify Dashboard: Plan, context, and *some* dashboards must exist
let modify_dashboard_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context")
&& state.contains_key("plan_available")
&& state.contains_key("dashboards_available") // Check if state includes dashboards
});
// Done tool: Available when plan execution might be complete or needs review
// Using the complex condition from follow-up, assuming it's relevant here too
let done_condition = Some(|state: &HashMap<String, Value>| -> bool {
let review_needed = state
.get("review_needed")
.and_then(Value::as_bool)
.unwrap_or(false);
let all_todos_complete = state
.get("todos") // Assuming plan execution updates 'todos'
.and_then(Value::as_array)
.map(|todos| {
todos.iter().all(|todo| {
todo.get("completed")
.and_then(Value::as_bool)
.unwrap_or(false)
})
})
.unwrap_or(false);
review_needed || all_todos_complete
});
// Clarifying question: Always available
let always_available = Some(|_state: &HashMap<String, Value>| -> bool { true });
// --- Add Tools with Conditions ---
agent
.add_tool(
create_metric_files_tool.get_name(),
create_metric_files_tool.into_tool_call_executor(),
create_metric_condition,
)
.await;
agent
.add_tool(
modify_metric_files_tool.get_name(),
modify_metric_files_tool.into_tool_call_executor(),
modify_metric_condition,
)
.await;
agent
.add_tool(
create_dashboard_files_tool.get_name(),
create_dashboard_files_tool.into_tool_call_executor(),
create_dashboard_condition,
)
.await;
agent
.add_tool(
modify_dashboard_files_tool.get_name(),
modify_dashboard_files_tool.into_tool_call_executor(),
modify_dashboard_condition,
)
.await;
agent
.add_tool(
message_user_clarifying_question_tool.get_name(),
message_user_clarifying_question_tool.into_tool_call_executor(),
always_available,
)
.await;
agent
.add_tool(
done_tool.get_name(),
done_tool.into_tool_call_executor(),
done_condition,
)
.await;
Ok(())
}

View File

@ -478,6 +478,7 @@ definitions:
- metric
metricColumnId:
type: string
description: The column ID to use for the metric value
metricValueAggregate:
type: string
enum:
@ -488,7 +489,65 @@ definitions:
- min
- count
- first
description: Optional - only used when the user specifically requests it, otherwise leave blank
description: Optional aggregation method for the metric value, defaults to sum if not specified
metricHeader:
oneOf:
- type: string
description: Simple string title for the metric header
- type: object
properties:
columnId:
type: string
description: Which column to use for the header
useValue:
type: boolean
description: Whether to display the key or the value in the chart
aggregate:
type: string
enum:
- sum
- average
- median
- max
- min
- count
- first
description: Optional aggregation method, defaults to sum
required:
- columnId
- useValue
description: Configuration for a derived metric header
metricSubHeader:
oneOf:
- type: string
description: Simple string title for the metric sub-header
- type: object
properties:
columnId:
type: string
description: Which column to use for the sub-header
useValue:
type: boolean
description: Whether to display the key or the value in the chart
aggregate:
type: string
enum:
- sum
- average
- median
- max
- min
- count
- first
description: Optional aggregation method, defaults to sum
required:
- columnId
- useValue
description: Configuration for a derived metric sub-header
metricValueLabel:
oneOf:
- type: string
description: Custom label to display with the metric value
required:
- selectedChartType
- metricColumnId

View File

@ -218,7 +218,7 @@ impl ToolExecutor for CreateMetricFilesTool {
let failures: Vec<String> = failed_files
.iter()
.map(|(name, error)| format!("Failed to create '{}': {}. Please recreate the metric from scratch rather than attempting to modify.", name, error))
.map(|(name, error)| format!("Failed to create '{}': {}.\n\nPlease recreate the metric from scratch rather than attempting to modify. This error could be due to:\n- Using a dataset that doesn't exist (please reevaluate the available datasets in the chat conversation)\n- Invalid configuration in the metric file\n- Special characters in the metric name or SQL query\n- Syntax errors in the SQL query", name, error))
.collect();
if failures.len() == 1 {

View File

@ -418,7 +418,13 @@ impl ToolExecutor for ModifyMetricFilesTool {
batch
.failed_updates
.into_iter()
.map(|(file_name, error)| FailedFileModification { file_name, error }),
.map(|(file_name, error)| {
let error_message = format!("Failed to modify '{}': {}.\\n\\nPlease attempt to modify the metric again. This error could be due to:\\n- Using a dataset that doesn't exist (please reevaluate the available datasets in the chat conversation)\\n- Invalid configuration in the metric file\\n- Special characters in the metric name or SQL query\\n- Syntax errors in the SQL query", file_name, error);
FailedFileModification {
file_name,
error: error_message,
}
}),
);
// Set review_needed flag if execution was successful

View File

@ -363,7 +363,7 @@ async fn rerank_datasets(
query,
documents,
model: ReRankModel::EnglishV3,
top_n: Some(30),
top_n: Some(50),
..Default::default()
};

View File

@ -456,10 +456,10 @@ pub struct MetricChartConfig {
pub metric_value_aggregate: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "metric_header")]
pub metric_header: Option<String>,
pub metric_header: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "metric_sub_header")]
pub metric_sub_header: Option<String>,
pub metric_sub_header: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "metric_value_label")]
pub metric_value_label: Option<String>,