mirror of https://github.com/buster-so/buster.git
improvements for markdown, formatting, schemas, models, etc.
This commit is contained in:
parent
437d3c1624
commit
081cabc4c6
|
@ -618,7 +618,7 @@ impl Agent {
|
||||||
session_id: thread.id.to_string(),
|
session_id: thread.id.to_string(),
|
||||||
trace_id: Uuid::new_v4().to_string(),
|
trace_id: Uuid::new_v4().to_string(),
|
||||||
}),
|
}),
|
||||||
reasoning_effort: Some("medium".to_string()),
|
reasoning_effort: Some("low".to_string()),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -303,11 +303,11 @@ impl BusterMultiAgent {
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
// Add dynamic model rule: Use gpt-4.1-mini when searching the data catalog
|
// Add dynamic model rule: Use gpt-4.1 when searching the data catalog
|
||||||
agent
|
agent
|
||||||
.add_dynamic_model_rule(
|
.add_dynamic_model_rule(
|
||||||
needs_data_catalog_search_condition, // Reuse the same condition
|
needs_data_catalog_search_condition, // Reuse the same condition
|
||||||
"gpt-4.1-mini".to_string(),
|
"gpt-4.1".to_string(),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
@ -1304,6 +1304,11 @@ You are a Search Agent, an AI assistant designed to analyze the conversation his
|
||||||
- Determine the data requirements for the *current* user request.
|
- Determine the data requirements for the *current* user request.
|
||||||
|
|
||||||
2. **Decision Logic**:
|
2. **Decision Logic**:
|
||||||
|
- **If the request is ONLY about visualization/charting aspects**: Use `no_search_needed` tool. These requests typically don't require new data assets:
|
||||||
|
- Changing chart colors or styles (e.g., "make the charts blue")
|
||||||
|
- Adding existing data to dashboards (e.g., "put these on a dashboard")
|
||||||
|
- Adjusting visualization parameters (e.g., "make this a bar chart instead of a line chart")
|
||||||
|
- Formatting or layout changes (e.g., "resize these charts")
|
||||||
- **If NO dataset context (detailed models) exists from previous searches**: Use `search_data_catalog` by default to gather initial context.
|
- **If NO dataset context (detailed models) exists from previous searches**: Use `search_data_catalog` by default to gather initial context.
|
||||||
- **If existing dataset context (detailed models) IS available**: Evaluate if this context provides sufficient information (relevant datasets, columns, documentation) to formulate a plan or perform analysis for the *current* user request.
|
- **If existing dataset context (detailed models) IS available**: Evaluate if this context provides sufficient information (relevant datasets, columns, documentation) to formulate a plan or perform analysis for the *current* user request.
|
||||||
- **If sufficient**: Use the `no_search_needed` tool. Provide a reason indicating that the necessary data context (models) is already available from previous steps.
|
- **If sufficient**: Use the `no_search_needed` tool. Provide a reason indicating that the necessary data context (models) is already available from previous steps.
|
||||||
|
@ -1317,6 +1322,7 @@ You are a Search Agent, an AI assistant designed to analyze the conversation his
|
||||||
- For `no_search_needed`, provide a concise explanation referencing the existing sufficient context (e.g., "Necessary dataset models identified in previous turn cover the current request").
|
- For `no_search_needed`, provide a concise explanation referencing the existing sufficient context (e.g., "Necessary dataset models identified in previous turn cover the current request").
|
||||||
|
|
||||||
**Rules**
|
**Rules**
|
||||||
|
- **Skip search for pure visualization requests**: If the user is ONLY asking about charting, visualization, or dashboard layout aspects (not requesting new data), use `no_search_needed` with a reason indicating the request is about visualization only.
|
||||||
- **Default to search if no context**: If no detailed dataset models are available from previous turns, always use `search_data_catalog` first.
|
- **Default to search if no context**: If no detailed dataset models are available from previous turns, always use `search_data_catalog` first.
|
||||||
- **Leverage existing context**: Before searching (if context exists), exhaustively evaluate if previously identified dataset models are sufficient to address the current user request's data needs for planning or analysis. Use `no_search_needed` only if the existing models suffice.
|
- **Leverage existing context**: Before searching (if context exists), exhaustively evaluate if previously identified dataset models are sufficient to address the current user request's data needs for planning or analysis. Use `no_search_needed` only if the existing models suffice.
|
||||||
- **Search only for missing information**: If existing context is insufficient, use `search_data_catalog` strategically only to fill the specific gaps in the agent's context (missing datasets, columns, details), not to re-discover information already known.
|
- **Search only for missing information**: If existing context is insufficient, use `search_data_catalog` strategically only to fill the specific gaps in the agent's context (missing datasets, columns, details), not to re-discover information already known.
|
||||||
|
@ -1340,6 +1346,12 @@ You are a Search Agent, an AI assistant designed to analyze the conversation his
|
||||||
- User asks in Turn 2: "Show me the lifetime value and recent orders for our top customer by revenue."
|
- User asks in Turn 2: "Show me the lifetime value and recent orders for our top customer by revenue."
|
||||||
- Tool: `no_search_needed`
|
- Tool: `no_search_needed`
|
||||||
- Reason: "The necessary dataset models (`customers`, `orders`) identified previously contain the required columns (`ltv`, `order_date`, `total_amount`) to fulfill this request."
|
- Reason: "The necessary dataset models (`customers`, `orders`) identified previously contain the required columns (`ltv`, `order_date`, `total_amount`) to fulfill this request."
|
||||||
|
- **Visualization-Only Request (No Search Needed)**: User asks, "Make all the charts blue and add them to a dashboard."
|
||||||
|
- Tool: `no_search_needed`
|
||||||
|
- Reason: "The request is only about chart styling and dashboard placement, not requiring any new data assets."
|
||||||
|
- **Data Discovery with Visualization (Needs Search)**: User asks, "Find other interesting metrics related to customer engagement and add those to the dashboard."
|
||||||
|
- Tool: `search_data_catalog`
|
||||||
|
- Query: "I need datasets containing customer engagement metrics that might be relevant for dashboard visualization."
|
||||||
- **Satisfied Request (Existing Context Sufficient -> No Search Needed)**: Context includes models for revenue datasets for Q1 2024, and user asks, "Can you confirm the Q1 revenue data?"
|
- **Satisfied Request (Existing Context Sufficient -> No Search Needed)**: Context includes models for revenue datasets for Q1 2024, and user asks, "Can you confirm the Q1 revenue data?"
|
||||||
- Tool: `no_search_needed`
|
- Tool: `no_search_needed`
|
||||||
- Reason: "The request pertains to Q1 2024 revenue data, for which detailed models were located in the prior search results."
|
- Reason: "The request pertains to Q1 2024 revenue data, for which detailed models were located in the prior search results."
|
||||||
|
@ -1352,8 +1364,10 @@ You are a Search Agent, an AI assistant designed to analyze the conversation his
|
||||||
- Implied data needs from analytical questions.
|
- Implied data needs from analytical questions.
|
||||||
- Vague or exploratory requests requiring initial data discovery.
|
- Vague or exploratory requests requiring initial data discovery.
|
||||||
- Follow-up requests building on established context.
|
- Follow-up requests building on established context.
|
||||||
|
- Visualization-only requests (no search needed).
|
||||||
|
|
||||||
**Request Interpretation**
|
**Request Interpretation**
|
||||||
|
- Evaluate if the request is ONLY about visualization, charting or dashboard layout (no search needed).
|
||||||
- Derive data needs from the user request *and* the current context (existing detailed dataset models).
|
- Derive data needs from the user request *and* the current context (existing detailed dataset models).
|
||||||
- If no models exist, search.
|
- If no models exist, search.
|
||||||
- If models exist, evaluate their sufficiency for the current request. If sufficient, use `no_search_needed`.
|
- If models exist, evaluate their sufficiency for the current request. If sufficient, use `no_search_needed`.
|
||||||
|
|
|
@ -107,11 +107,11 @@ pub const METRIC_YML_SCHEMA: &str = r##"
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
# Required top-level fields:
|
# Required top-level fields:
|
||||||
#
|
#
|
||||||
# name: Your Metric Title
|
# name: Your Metric Title # Do NOT use quotes for string values
|
||||||
# description: A detailed description of what this metric measures and how it should be interpreted # Optional
|
# description: A detailed description of what this metric measures and how it should be interpreted # Optional, NO quotes
|
||||||
# datasetIds:
|
# datasetIds:
|
||||||
# - 123e4567-e89b-12d3-a456-426614174000 # Dataset UUIDs (not names)
|
# - 123e4567-e89b-12d3-a456-426614174000 # Dataset UUIDs (not names) do not escape with quotes
|
||||||
# timeFrame: Last 30 days # Human-readable time period covered by the query
|
# timeFrame: Last 30 days # Human-readable time period covered by the query, NO quotes
|
||||||
# sql: |
|
# sql: |
|
||||||
# SELECT
|
# SELECT
|
||||||
# date,
|
# date,
|
||||||
|
@ -139,6 +139,9 @@ pub const METRIC_YML_SCHEMA: &str = r##"
|
||||||
#
|
#
|
||||||
# RULES:
|
# RULES:
|
||||||
# 1. All arrays should follow the YML array syntax using `-` not `[` and `]`
|
# 1. All arrays should follow the YML array syntax using `-` not `[` and `]`
|
||||||
|
# 2. Do not use quotes for ANY string fields, including names, descriptions, UUIDs, etc.
|
||||||
|
# 3. Avoid special characters in all string fields except within the SQL query
|
||||||
|
# 4. All fields must use standard YAML syntax - strings without quotes, arrays with `-`
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
|
|
||||||
type: object
|
type: object
|
||||||
|
@ -149,7 +152,7 @@ properties:
|
||||||
# NAME
|
# NAME
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
description: Human-readable title (e.g., Total Sales)
|
description: Human-readable title (e.g., Total Sales) - do NOT use quotes
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
description:
|
description:
|
||||||
|
@ -163,7 +166,7 @@ properties:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
format: uuid
|
format: uuid
|
||||||
description: UUID string of the dataset (not the dataset name)
|
description: UUID of the dataset (not the dataset name) do not escape with quotes
|
||||||
|
|
||||||
# TIME FRAME
|
# TIME FRAME
|
||||||
timeFrame:
|
timeFrame:
|
||||||
|
@ -543,12 +546,12 @@ pub const DASHBOARD_YML_SCHEMA: &str = r##"
|
||||||
# ----------------------------------------
|
# ----------------------------------------
|
||||||
# Required fields:
|
# Required fields:
|
||||||
#
|
#
|
||||||
# name: Your Dashboard Title
|
# name: Your Dashboard Title # Do NOT use quotes for string values
|
||||||
# description: A description of the dashboard, its metrics, and its purpose.
|
# description: A description of the dashboard, its metrics, and its purpose. # NO quotes
|
||||||
# rows:
|
# rows:
|
||||||
# - id: 1 # Required row ID (integer)
|
# - id: 1 # Required row ID (integer)
|
||||||
# items:
|
# items:
|
||||||
# - id: metric-uuid-1 # UUIDv4 of an existing metric
|
# - id: metric-uuid-1 # UUIDv4 of an existing metric, NO quotes
|
||||||
# columnSizes: [12] # Required - must sum to exactly 12
|
# columnSizes: [12] # Required - must sum to exactly 12
|
||||||
# - id: 2 # REQUIRED
|
# - id: 2 # REQUIRED
|
||||||
# items:
|
# items:
|
||||||
|
@ -566,7 +569,9 @@ pub const DASHBOARD_YML_SCHEMA: &str = r##"
|
||||||
# 5. Each column size must be at least 3
|
# 5. Each column size must be at least 3
|
||||||
# 6. All arrays should follow the YML array syntax using `-`
|
# 6. All arrays should follow the YML array syntax using `-`
|
||||||
# 7. All arrays should NOT USE `[]` formatting.
|
# 7. All arrays should NOT USE `[]` formatting.
|
||||||
# 8. don't use comments. the ones in the example are just for explanation
|
# 8. Don't use comments. The ones in the example are just for explanation
|
||||||
|
# 9. Do NOT use quotes for ANY string values, including names, descriptions and UUIDs
|
||||||
|
# 10. Avoid special characters in all fields
|
||||||
# ----------------------------------------
|
# ----------------------------------------
|
||||||
|
|
||||||
type: object
|
type: object
|
||||||
|
@ -575,7 +580,7 @@ description: Specifies the structure and constraints of a dashboard config file.
|
||||||
properties:
|
properties:
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
description: The title of the dashboard (e.g. Sales & Marketing Dashboard)
|
description: The title of the dashboard (e.g. Sales & Marketing Dashboard) - do NOT use quotes
|
||||||
description:
|
description:
|
||||||
type: string
|
type: string
|
||||||
description: A description of the dashboard, its metrics, and its purpose
|
description: A description of the dashboard, its metrics, and its purpose
|
||||||
|
|
|
@ -51,7 +51,7 @@ impl ToolExecutor for Done {
|
||||||
"properties": {
|
"properties": {
|
||||||
"final_response": {
|
"final_response": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The final response done in markdown format. No headers though. Bullet points in markdown please."
|
"description": "The final response message to the user. **MUST** be formatted in Markdown. Use bullet points or other appropriate Markdown formatting. Do not include headers."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
|
|
|
@ -42,7 +42,7 @@ impl ToolExecutor for MessageUserClarifyingQuestion {
|
||||||
async fn get_schema(&self) -> Value {
|
async fn get_schema(&self) -> Value {
|
||||||
serde_json::json!({
|
serde_json::json!({
|
||||||
"name": self.get_name(),
|
"name": self.get_name(),
|
||||||
"description": "Use if you need to send a clarifying question to the user. You should only use this is the user request is so vague or ambiguous that you cannot determine what data to search for.",
|
"description": "Use if you need to send a clarifying question to the user. You should only use this if the user request is so vague or ambiguous that you cannot determine what data to search for.",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
|
@ -51,7 +51,7 @@ impl ToolExecutor for MessageUserClarifyingQuestion {
|
||||||
"properties": {
|
"properties": {
|
||||||
"text": {
|
"text": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Message text to display to user."
|
"description": "Message text to display to user. **Supports markdown formatting**."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
|
|
|
@ -775,6 +775,36 @@ pub async fn post_chat_handler(
|
||||||
// Create the final response message list: Start with filtered files, then add text/other messages
|
// Create the final response message list: Start with filtered files, then add text/other messages
|
||||||
// Use the file messages that were generated and sent early
|
// Use the file messages that were generated and sent early
|
||||||
let mut final_response_messages = early_sent_file_messages; // Use early sent files
|
let mut final_response_messages = early_sent_file_messages; // Use early sent files
|
||||||
|
|
||||||
|
// Check if any file messages were sent during streaming - if not, generate them now
|
||||||
|
if final_response_messages.is_empty() {
|
||||||
|
// Collect completed files from reasoning messages
|
||||||
|
let completed_files = collect_completed_files(&all_transformed_containers);
|
||||||
|
|
||||||
|
// Only proceed if there are files to process
|
||||||
|
if !completed_files.is_empty() {
|
||||||
|
// Apply filtering rules to determine which files to show
|
||||||
|
match apply_file_filtering_rules(
|
||||||
|
&completed_files,
|
||||||
|
context_dashboard_id,
|
||||||
|
&get_pg_pool(),
|
||||||
|
).await {
|
||||||
|
Ok(filtered_files) => {
|
||||||
|
// Generate file response values and add them to final_response_messages
|
||||||
|
final_response_messages = generate_file_response_values(&filtered_files);
|
||||||
|
tracing::info!(
|
||||||
|
"Added {} file responses to final state because no files were sent during streaming",
|
||||||
|
final_response_messages.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Error applying file filtering rules for final state: {}", e);
|
||||||
|
// Continue with empty file messages list if filtering fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final_response_messages.append(&mut text_and_other_response_messages);
|
final_response_messages.append(&mut text_and_other_response_messages);
|
||||||
|
|
||||||
// Update chat_with_messages with final state (now including filtered files first)
|
// Update chat_with_messages with final state (now including filtered files first)
|
||||||
|
@ -1621,7 +1651,7 @@ fn tool_modify_metrics(id: String, content: String, delta_duration: Duration) ->
|
||||||
let modify_metrics_result = match serde_json::from_str::<ModifyFilesOutput>(&content) {
|
let modify_metrics_result = match serde_json::from_str::<ModifyFilesOutput>(&content) {
|
||||||
Ok(result) => result,
|
Ok(result) => result,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Failed to parse ModifyFilesOutput: {:?}", e);
|
tracing::error!("Failed to parse ModifyFilesOutput: {:?}", e);
|
||||||
// Return an error reasoning message
|
// Return an error reasoning message
|
||||||
return Ok(vec![BusterReasoningMessage::Text(BusterReasoningText {
|
return Ok(vec![BusterReasoningMessage::Text(BusterReasoningText {
|
||||||
id,
|
id,
|
||||||
|
@ -1635,8 +1665,6 @@ fn tool_modify_metrics(id: String, content: String, delta_duration: Duration) ->
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Remove internal duration calculation
|
|
||||||
// let duration = (modify_metrics_result.duration as f64 / 1000.0 * 10.0).round() / 10.0;
|
|
||||||
let files_count = modify_metrics_result.files.len();
|
let files_count = modify_metrics_result.files.len();
|
||||||
|
|
||||||
// Create a map of files
|
// Create a map of files
|
||||||
|
@ -1651,7 +1679,7 @@ fn tool_modify_metrics(id: String, content: String, delta_duration: Duration) ->
|
||||||
let buster_file = BusterFile {
|
let buster_file = BusterFile {
|
||||||
id: file_id.clone(),
|
id: file_id.clone(),
|
||||||
file_type: "metric".to_string(),
|
file_type: "metric".to_string(),
|
||||||
file_name: file.name.clone(), // Use the updated name from the file
|
file_name: file.name.clone(),
|
||||||
version_number: file.version_number,
|
version_number: file.version_number,
|
||||||
status: "completed".to_string(),
|
status: "completed".to_string(),
|
||||||
file: BusterFileContent {
|
file: BusterFileContent {
|
||||||
|
@ -1669,8 +1697,8 @@ fn tool_modify_metrics(id: String, content: String, delta_duration: Duration) ->
|
||||||
let buster_file = BusterReasoningMessage::File(BusterReasoningFile {
|
let buster_file = BusterReasoningMessage::File(BusterReasoningFile {
|
||||||
id,
|
id,
|
||||||
message_type: "files".to_string(),
|
message_type: "files".to_string(),
|
||||||
title: if files_count == 1 { "Modified 1 metric file".to_string() } else { format!("Modified {} metric files", files_count) },
|
title: format!("Modified {} metric file{}", files_count, if files_count == 1 { "" } else { "s" }),
|
||||||
secondary_title: format!("{} seconds", delta_duration.as_secs()), // Use delta_duration
|
secondary_title: format!("{} seconds", delta_duration.as_secs()),
|
||||||
status: "completed".to_string(),
|
status: "completed".to_string(),
|
||||||
file_ids,
|
file_ids,
|
||||||
files: files_map,
|
files: files_map,
|
||||||
|
@ -1750,7 +1778,7 @@ fn tool_modify_dashboards(id: String, content: String, delta_duration: Duration)
|
||||||
let modify_dashboards_result = match serde_json::from_str::<ModifyFilesOutput>(&content) {
|
let modify_dashboards_result = match serde_json::from_str::<ModifyFilesOutput>(&content) {
|
||||||
Ok(result) => result,
|
Ok(result) => result,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Failed to parse ModifyFilesOutput: {:?}", e);
|
tracing::error!("Failed to parse ModifyFilesOutput: {:?}", e);
|
||||||
// Return an error reasoning message
|
// Return an error reasoning message
|
||||||
return Ok(vec![BusterReasoningMessage::Text(BusterReasoningText {
|
return Ok(vec![BusterReasoningMessage::Text(BusterReasoningText {
|
||||||
id,
|
id,
|
||||||
|
@ -1764,8 +1792,6 @@ fn tool_modify_dashboards(id: String, content: String, delta_duration: Duration)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Remove internal duration calculation
|
|
||||||
// let duration = (modify_dashboards_result.duration as f64 / 1000.0 * 10.0).round() / 10.0;
|
|
||||||
let files_count = modify_dashboards_result.files.len();
|
let files_count = modify_dashboards_result.files.len();
|
||||||
|
|
||||||
// Create a map of files
|
// Create a map of files
|
||||||
|
@ -1799,7 +1825,7 @@ fn tool_modify_dashboards(id: String, content: String, delta_duration: Duration)
|
||||||
id,
|
id,
|
||||||
message_type: "files".to_string(),
|
message_type: "files".to_string(),
|
||||||
title: format!("Modified {} dashboard file{}", files_count, if files_count == 1 { "" } else { "s" }),
|
title: format!("Modified {} dashboard file{}", files_count, if files_count == 1 { "" } else { "s" }),
|
||||||
secondary_title: format!("{} seconds", delta_duration.as_secs()), // Use delta_duration
|
secondary_title: format!("{} seconds", delta_duration.as_secs()),
|
||||||
status: "completed".to_string(),
|
status: "completed".to_string(),
|
||||||
file_ids,
|
file_ids,
|
||||||
files: files_map,
|
files: files_map,
|
||||||
|
|
Loading…
Reference in New Issue