Merge branch 'evals' of https://github.com/buster-so/buster into evals

This commit is contained in:
Nate Kelley 2025-04-11 09:56:04 -06:00
commit 24d785616c
No known key found for this signature in database
GPG Key ID: FD90372AB8D98B4F
4 changed files with 150 additions and 98 deletions

View File

@ -107,10 +107,11 @@ pub const METRIC_YML_SCHEMA: &str = r##"
# ------------------------------------- # -------------------------------------
# Required top-level fields: # Required top-level fields:
# #
# name: "Your Metric Title" # name: Your Metric Title
# description: "A detailed description of what this metric measures and how it should be interpreted" # Optional # description: A detailed description of what this metric measures and how it should be interpreted # Optional
# datasetIds: ["123e4567-e89b-12d3-a456-426614174000"] # Dataset UUIDs (not names) # datasetIds:
# timeFrame: "Last 30 days" # Human-readable time period covered by the query # - 123e4567-e89b-12d3-a456-426614174000 # Dataset UUIDs (not names)
# timeFrame: Last 30 days # Human-readable time period covered by the query
# sql: | # sql: |
# SELECT # SELECT
# date, # date,
@ -120,24 +121,21 @@ pub const METRIC_YML_SCHEMA: &str = r##"
# #
# chartConfig: # chartConfig:
# selectedChartType: "bar" # One of: bar, line, scatter, pie, combo, metric, table # selectedChartType: "bar" # One of: bar, line, scatter, pie, combo, metric, table
# columnLabelFormats: { # REQUIRED - Must define formatting for all columns # columnLabelFormats: # REQUIRED - Must define formatting for all columns
# "date": { # date:
# "columnType": "date", # columnType: date
# "style": "date", # style: date
# "dateFormat": "MMM DD, YYYY" # dateFormat: MMM DD, YYYY
# }, # total:
# "total": { # columnType: number
# "columnType": "number", # style: currency
# "style": "currency", # currency: USD
# "currency": "USD", # minimumFractionDigits: 2
# "minimumFractionDigits": 2
# }
# }
# barAndLineAxis: {...} # Required for bar and line charts OR # barAndLineAxis: {...} # Required for bar and line charts OR
# scatterAxis: {...} # Required for scatter charts OR # scatterAxis: {...} # Required for scatter charts OR
# pieChartAxis: {...} # Required for pie charts OR # pieChartAxis: {...} # Required for pie charts OR
# comboChartAxis: {...} # Required for combo charts OR # comboChartAxis: {...} # Required for combo charts OR
# metricColumnId: "column_id" # Required for metric charts # metricColumnId: column_id # Required for metric charts
# ------------------------------------- # -------------------------------------
type: object type: object
@ -491,17 +489,17 @@ pub const DASHBOARD_YML_SCHEMA: &str = r##"
# ---------------------------------------- # ----------------------------------------
# Required fields: # Required fields:
# #
# name: "Your Dashboard Title" # name: Your Dashboard Title
# description: "A description of the dashboard, it's metrics, and its purpose." # description: A description of the dashboard, its metrics, and its purpose.
# rows: # rows:
# - id: 1 # Required row ID (integer) # - id: 1 # Required row ID (integer)
# items: # items:
# - id: "metric-uuid-1" # UUIDv4 of an existing metric # - id: metric-uuid-1 # UUIDv4 of an existing metric
# columnSizes: [12] # Required - must sum to exactly 12 # columnSizes: [12] # Required - must sum to exactly 12
# - id: 2 # REQUIRED # - id: 2 # REQUIRED
# items: # items:
# - id: "metric-uuid-2" # - id: metric-uuid-2
# - id: "metric-uuid-3" # - id: metric-uuid-3
# columnSizes: [6, 6] # Required - must sum to exactly 12 # columnSizes: [6, 6] # Required - must sum to exactly 12
# #
# Rules: # Rules:
@ -1047,6 +1045,13 @@ pub struct ModifyFilesOutput {
pub message: String, pub message: String,
pub duration: i64, pub duration: i64,
pub files: Vec<FileWithId>, pub files: Vec<FileWithId>,
pub failed_files: Vec<FailedFileModification>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FailedFileModification {
pub file_name: String,
pub error: String,
} }
#[derive(Debug)] #[derive(Debug)]

View File

@ -17,7 +17,7 @@ use tracing::{debug, error, info};
use super::{ use super::{
common::{ common::{
process_dashboard_file_modification, ModificationResult, ModifyFilesOutput, process_dashboard_file_modification, ModificationResult, ModifyFilesOutput,
ModifyFilesParams, ModifyFilesParams, FailedFileModification,
}, },
file_types::file::FileWithId, file_types::file::FileWithId,
FileModificationTool, FileModificationTool,
@ -178,14 +178,16 @@ impl ToolExecutor for ModifyDashboardFilesTool {
let duration = start_time.elapsed().as_millis() as i64; let duration = start_time.elapsed().as_millis() as i64;
let mut output = ModifyFilesOutput { let mut output = ModifyFilesOutput {
message: format!( message: format!(
"Modified {} dashboard files and created new versions", "Modified {} dashboard files and created new versions. {} failures.",
batch.files.len() batch.files.len(),
batch.failed_modifications.len()
), ),
duration, duration,
files: Vec::new(), files: Vec::new(),
failed_files: Vec::new(),
}; };
// Add files to output // Add successful files to output
output output
.files .files
.extend(batch.files.iter().enumerate().map(|(i, file)| { .extend(batch.files.iter().enumerate().map(|(i, file)| {
@ -203,6 +205,14 @@ impl ToolExecutor for ModifyDashboardFilesTool {
} }
})); }));
// Add failed modifications to output
output.failed_files.extend(
batch
.failed_modifications
.into_iter()
.map(|(file_name, error)| FailedFileModification { file_name, error }),
);
Ok(output) Ok(output)
} }
@ -352,7 +362,7 @@ async fn get_modify_dashboards_new_content_description() -> String {
async fn get_modify_dashboards_content_to_replace_description() -> String { async fn get_modify_dashboards_content_to_replace_description() -> String {
if env::var("USE_BRAINTRUST_PROMPTS").is_err() { if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
return "The exact content in the file that should be replaced. Must match exactly and be specific enough to only match once. Use an empty string to append the new content to the end of the file." return "The exact content in the file that should be replaced. Precise matching is crucial: the provided content must match exactly and be specific enough to target only the intended section, avoiding unintended replacements. This should typically be a small, specific snippet; replacing the entire file content is usually not intended unless the entire dashboard definition needs a complete overhaul. Use an empty string to append the new content to the end of the file."
.to_string(); .to_string();
} }
@ -361,7 +371,7 @@ async fn get_modify_dashboards_content_to_replace_description() -> String {
Ok(message) => message, Ok(message) => message,
Err(e) => { Err(e) => {
eprintln!("Failed to get prompt system message: {}", e); eprintln!("Failed to get prompt system message: {}", e);
"The exact content in the file that should be replaced. Must match exactly and be specific enough to only match once. Use an empty string to append the new content to the end of the file.".to_string() "The exact content in the file that should be replaced. Precise matching is crucial: the provided content must match exactly and be specific enough to target only the intended section, avoiding unintended replacements. This should typically be a small, specific snippet; replacing the entire file content is usually not intended unless the entire dashboard definition needs a complete overhaul. Use an empty string to append the new content to the end of the file.".to_string()
} }
} }
} }

View File

@ -21,6 +21,7 @@ use uuid::Uuid;
use super::{ use super::{
common::{ common::{
process_metric_file_modification, ModificationResult, ModifyFilesOutput, ModifyFilesParams, process_metric_file_modification, ModificationResult, ModifyFilesOutput, ModifyFilesParams,
FailedFileModification,
}, },
file_types::file::FileWithId, file_types::file::FileWithId,
FileModificationTool, FileModificationTool,
@ -91,6 +92,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
return Ok(ModifyFilesOutput { return Ok(ModifyFilesOutput {
message: format!("Failed to connect to database: {}", e), message: format!("Failed to connect to database: {}", e),
files: Vec::new(), files: Vec::new(),
failed_files: Vec::new(),
duration, duration,
}); });
} }
@ -211,6 +213,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
return Ok(ModifyFilesOutput { return Ok(ModifyFilesOutput {
message: format!("Failed to fetch metric files: {}", e), message: format!("Failed to fetch metric files: {}", e),
files: Vec::new(), files: Vec::new(),
failed_files: Vec::new(),
duration, duration,
}); });
} }
@ -219,11 +222,6 @@ impl ToolExecutor for ModifyMetricFilesTool {
// Process results and generate output message // Process results and generate output message
let duration = start_time.elapsed().as_millis() as i64; let duration = start_time.elapsed().as_millis() as i64;
let _output = ModifyFilesOutput {
message: String::new(),
files: Vec::new(),
duration,
};
// Update metric files in database with version history and metadata // Update metric files in database with version history and metadata
if !batch.files.is_empty() { if !batch.files.is_empty() {
@ -256,18 +254,19 @@ impl ToolExecutor for ModifyMetricFilesTool {
} }
} }
// Generate output // Construct final output
let duration = start_time.elapsed().as_millis() as i64;
let mut output = ModifyFilesOutput { let mut output = ModifyFilesOutput {
message: format!( message: format!(
"Modified {} metric files and created new versions", "Modified {} metric files and created new versions. {} failures.",
batch.files.len() batch.files.len(),
batch.failed_modifications.len()
), ),
duration, duration,
files: Vec::new(), files: Vec::new(),
failed_files: Vec::new(),
}; };
// Add files to output // Add successful files to output
output output
.files .files
.extend(batch.files.iter().enumerate().map(|(i, file)| { .extend(batch.files.iter().enumerate().map(|(i, file)| {
@ -285,6 +284,14 @@ impl ToolExecutor for ModifyMetricFilesTool {
} }
})); }));
// Add failed modifications to output
output.failed_files.extend(
batch
.failed_modifications
.into_iter()
.map(|(file_name, error)| FailedFileModification { file_name, error }),
);
Ok(output) Ok(output)
} }

View File

@ -25,48 +25,28 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
let token_start_index = input.rfind(' ').map_or(0, |i| i + 1); let token_start_index = input.rfind(' ').map_or(0, |i| i + 1);
let potential_fragment = &input[token_start_index..]; let potential_fragment = &input[token_start_index..];
if potential_fragment.is_empty() && !input.ends_with(' ') { // Handle empty input or input ending with space
// If the whole input is empty, or the last non-empty part doesn't look like a path start if potential_fragment.is_empty() {
// Let's offer completions from the CWD if the *entire* input is empty OR ends with space if input.is_empty() || input.ends_with(' ') {
if input.is_empty() || input.ends_with(' '){ // Offer completions from CWD for empty input or space-terminated input
return Some(PathCompletionTarget { return Some(PathCompletionTarget {
fragment: "", fragment: "",
base_dir_str: "", base_dir_str: "",
prefix: "", prefix: "",
search_dir: cwd.to_path_buf(), search_dir: cwd.to_path_buf(),
fragment_start_index: input.len(), // At the end fragment_start_index: input.len(), // At the end
}); });
} else { } else {
// Don't autocomplete mid-word *unless* it looks like a path // Don't autocomplete mid-word *unless* it looks like a path
if !potential_fragment.contains('/') && !potential_fragment.starts_with('.'){ if !potential_fragment.contains('/') && !potential_fragment.starts_with('.') {
return None; return None;
} }
// If it looks like a path fragment even without spaces, continue parsing below.
} }
} else if potential_fragment.is_empty() && input.ends_with(' ') {
// Explicitly trigger completion from CWD if input ends with space
return Some(PathCompletionTarget {
fragment: "",
base_dir_str: "",
prefix: "",
search_dir: cwd.to_path_buf(),
fragment_start_index: input.len(), // At the end
});
} }
// For any non-empty fragment, we'll try to offer completions
// Basic check: Does it look like a path at all? (./ ../ / or contains /) // Whether or not it contains path separators
// Or is it just a potential file/dir name in the CWD?
let is_path_like = potential_fragment.starts_with("./")
|| potential_fragment.starts_with("../")
// || potential_fragment.starts_with('/') // Avoid absolute paths for now
|| potential_fragment.contains('/');
// If it's not explicitly path-like, assume it's a potential file/dir in CWD
if !is_path_like && potential_fragment.contains(std::path::is_separator) {
return None; // Avoid completing things like "some command with/slashes"
}
// Find the last path separator to determine base directory and prefix // Find the last path separator to determine base directory and prefix
let (fragment_base_dir_str, prefix) = match potential_fragment.rfind('/') { let (fragment_base_dir_str, prefix) = match potential_fragment.rfind('/') {
Some(sep_index) => { Some(sep_index) => {
@ -75,7 +55,8 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
(&potential_fragment[..=sep_index], &potential_fragment[sep_index + 1..]) (&potential_fragment[..=sep_index], &potential_fragment[sep_index + 1..])
} }
None => { None => {
// Example: "READ" -> base="", prefix="READ" // Example: "chat" -> base="", prefix="chat"
// Treat any word as a potential filename or directory name
("", potential_fragment) ("", potential_fragment)
} }
}; };
@ -87,17 +68,33 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
// Basic security/sanity check: prevent "escaping" the CWD with excessive "../" // Basic security/sanity check: prevent "escaping" the CWD with excessive "../"
let canonical_search = match search_dir_abs.canonicalize() { let canonical_search = match search_dir_abs.canonicalize() {
Ok(p) => p, Ok(p) => p,
Err(_) => return None, // Invalid base path derived from fragment Err(_) => {
// If the base directory doesn't exist, default to CWD for searching
return Some(PathCompletionTarget {
fragment: potential_fragment,
base_dir_str: "",
prefix: potential_fragment, // Use the whole fragment as prefix
search_dir: cwd.to_path_buf(),
fragment_start_index: token_start_index,
});
}
}; };
let canonical_cwd = match cwd.canonicalize() {
Ok(p) => p,
Err(_) => return None, // Should not happen normally
};
if !canonical_search.starts_with(&canonical_cwd) {
// eprintln!("Warning: Completion attempted outside of CWD.");
return None; // Don't allow completions outside the initial CWD
}
let canonical_cwd = match cwd.canonicalize() {
Ok(p) => p,
Err(_) => return None, // Should not happen normally
};
if !canonical_search.starts_with(&canonical_cwd) {
// Fall back to CWD for security
return Some(PathCompletionTarget {
fragment: potential_fragment,
base_dir_str: "",
prefix: potential_fragment, // Use the whole fragment as prefix
search_dir: cwd.to_path_buf(),
fragment_start_index: token_start_index,
});
}
Some(PathCompletionTarget { Some(PathCompletionTarget {
fragment: potential_fragment, fragment: potential_fragment,
@ -140,27 +137,25 @@ fn find_completions_recursive(
} }
let entry_path_relative = relative_path.join(name); let entry_path_relative = relative_path.join(name);
let entry_path_str = entry_path_relative.to_string_lossy().to_lowercase();
let name_lower = name.to_lowercase();
// Case-insensitive check against the *full relative path* // Check if this entry should be included in completions:
if entry_path_relative.to_string_lossy().to_lowercase().starts_with(base_prefix_lower) { // 1. Empty prefix means include everything from starting directory
let mut completion_path = entry_path_relative.clone(); // Path relative to initial search // 2. Name starts with prefix (traditional completion)
// 3. Any part of path contains the prefix (fuzzy search)
let matches_path = entry_path_str.contains(base_prefix_lower);
let matches_name = name_lower.contains(base_prefix_lower);
let should_include = base_prefix_lower.is_empty() || matches_name || matches_path;
if should_include {
let completion_path = entry_path_relative.clone(); // Path relative to initial search
match entry.file_type() { match entry.file_type() {
Ok(ft) => { Ok(ft) => {
if ft.is_dir() { if ft.is_dir() {
// Add the directory itself to completions (with a slash) // Add the directory itself to completions (with a slash)
// Check if the match is the directory name itself or something inside it completions.push(completion_path);
if name.to_lowercase().starts_with(base_prefix_lower) || base_prefix_lower.is_empty() || base_prefix_lower.starts_with(&name.to_lowercase()) {
completions.push(completion_path);
// Recurse into subdirectory
let _ = find_completions_recursive(
&entry.path(),
base_prefix_lower,
&entry_path_relative,
completions,
max_depth - 1,
);
}
} else if ft.is_file() { } else if ft.is_file() {
// Add the file // Add the file
completions.push(completion_path); completions.push(completion_path);
@ -169,6 +164,23 @@ fn find_completions_recursive(
Err(_) => { /* Ignore file type errors */ } Err(_) => { /* Ignore file type errors */ }
} }
} }
// Always recurse into directories, even if they don't match,
// as they might contain matching files or subdirectories
match entry.file_type() {
Ok(ft) => {
if ft.is_dir() {
let _ = find_completions_recursive(
&entry.path(),
base_prefix_lower,
&entry_path_relative,
completions,
max_depth - 1,
);
}
}
Err(_) => { /* Ignore file type errors */ }
}
} }
} }
} }
@ -197,7 +209,7 @@ pub fn get_completions<'a>(input: &'a str, cwd_str: &str) -> (Vec<String>, Optio
let mut raw_completions = Vec::new(); let mut raw_completions = Vec::new();
let prefix_lower = target.prefix.to_lowercase(); let prefix_lower = target.prefix.to_lowercase();
let max_depth = 5; // Limit recursion depth let max_depth = 10; // Increased recursion depth for better deep directory search
// Start recursive search from the absolute search_dir identified by parse_input // Start recursive search from the absolute search_dir identified by parse_input
// Pass an empty initial relative path // Pass an empty initial relative path
@ -225,14 +237,24 @@ pub fn get_completions<'a>(input: &'a str, cwd_str: &str) -> (Vec<String>, Optio
final_completions.push(completion_str); final_completions.push(completion_str);
} }
// Deduplicate before sorting - some paths might show up multiple times
// due to the more aggressive recursive search
final_completions.sort();
final_completions.dedup();
// Now sort with directories first
final_completions.sort_by(|a, b| { final_completions.sort_by(|a, b| {
let a_is_dir = a.ends_with('/'); let a_is_dir = a.ends_with('/');
let b_is_dir = b.ends_with('/'); let b_is_dir = b.ends_with('/');
// Sort directories first, then files, then alphabetically // Sort directories first, then files, then alphabetically
b_is_dir.cmp(&a_is_dir).then_with(|| a.cmp(b)) b_is_dir.cmp(&a_is_dir).then_with(|| a.cmp(b))
}); });
final_completions.dedup(); // Remove duplicates that might arise
// Limit results to a reasonable number to avoid overwhelming the UI
let max_completions = 100;
if final_completions.len() > max_completions {
final_completions.truncate(max_completions);
}
(final_completions, Some(target)) (final_completions, Some(target))
} }
@ -336,10 +358,18 @@ mod tests {
let (completions, _) = get_completions("src/sub", cwd_str); let (completions, _) = get_completions("src/sub", cwd_str);
assert_eq!(completions, vec!["src/subdir/", "src/subdir/deep_file.txt"]); assert_eq!(completions, vec!["src/subdir/", "src/subdir/deep_file.txt"]);
// Complete file in deep path // Complete file in deep path
let (completions, _) = get_completions("src/subdir/d", cwd_str); let (completions, _) = get_completions("src/subdir/d", cwd_str);
assert_eq!(completions, vec!["src/subdir/deep_file.txt"]); assert_eq!(completions, vec!["src/subdir/deep_file.txt"]);
// Test flexible search (no path structure)
// This is the key new functionality - finds matches anywhere in the directory tree
let (completions, _) = get_completions("deep", cwd_str);
assert_eq!(completions, vec!["src/subdir/deep_file.txt"]);
let (completions, _) = get_completions("lib", cwd_str);
assert_eq!(completions, vec!["src/lib.rs"]);
// Empty input -> complete from CWD // Empty input -> complete from CWD
let (completions, _) = get_completions("", cwd_str); let (completions, _) = get_completions("", cwd_str);
assert_eq!(completions, vec!["src/", "LICENSE", "README.md"]); // Only top level for empty assert_eq!(completions, vec!["src/", "LICENSE", "README.md"]); // Only top level for empty