Merge branch 'evals' of https://github.com/buster-so/buster into evals

2025-04-11 09:56:04 -06:00 · 2025-04-11 09:56:04 -06:00 · 24d785616c
parent 1fdd46fd43 34dabe8122
commit 24d785616c
4 changed files with 150 additions and 98 deletions
--- a/api/libs/agents/src/tools/categories/file_tools/common.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/common.rs
@ -107,10 +107,11 @@ pub const METRIC_YML_SCHEMA: &str = r##"
 # -------------------------------------
 # Required top-level fields:
 #
-# name: "Your Metric Title"
+# name: Your Metric Title
-# description: "A detailed description of what this metric measures and how it should be interpreted"  # Optional
+# description: A detailed description of what this metric measures and how it should be interpreted  # Optional
-# datasetIds: ["123e4567-e89b-12d3-a456-426614174000"]  # Dataset UUIDs (not names)
+# datasetIds: 
-# timeFrame: "Last 30 days"  # Human-readable time period covered by the query
+#   - 123e4567-e89b-12d3-a456-426614174000  # Dataset UUIDs (not names)
 # timeFrame: Last 30 days  # Human-readable time period covered by the query
 # sql: |
 #   SELECT 
 #     date,
@ -120,24 +121,21 @@ pub const METRIC_YML_SCHEMA: &str = r##"
 # 
 # chartConfig:
 #   selectedChartType: "bar"  # One of: bar, line, scatter, pie, combo, metric, table
-#   columnLabelFormats: {     # REQUIRED - Must define formatting for all columns
+#   columnLabelFormats:      # REQUIRED - Must define formatting for all columns
-#     "date": {
+#     date:
-#       "columnType": "date",
+#       columnType: date
-#       "style": "date",
+#       style: date
-#       "dateFormat": "MMM DD, YYYY"
+#       dateFormat: MMM DD, YYYY
-#     },
+#     total:
-#     "total": {
+#       columnType: number
-#       "columnType": "number",
+#       style: currency
-#       "style": "currency",
+#       currency: USD
-#       "currency": "USD",
+#       minimumFractionDigits: 2
 #       "minimumFractionDigits": 2
 #     }
 #   }
 #   barAndLineAxis: {...}  # Required for bar and line charts OR
 #   scatterAxis: {...}  # Required for scatter charts OR
 #   pieChartAxis: {...}  # Required for pie charts OR
 #   comboChartAxis: {...}  # Required for combo charts OR
-#   metricColumnId: "column_id"  # Required for metric charts
+#   metricColumnId: column_id  # Required for metric charts
 # -------------------------------------
 type: object
@ -491,17 +489,17 @@ pub const DASHBOARD_YML_SCHEMA: &str = r##"
 # ----------------------------------------
 # Required fields:
 #
-# name: "Your Dashboard Title"
+# name: Your Dashboard Title
-# description: "A description of the dashboard, it's metrics, and its purpose."
+# description: A description of the dashboard, its metrics, and its purpose.
 # rows: 
 #   - id: 1               # Required row ID (integer)
 #     items:
-#       - id: "metric-uuid-1"  # UUIDv4 of an existing metric
+#       - id: metric-uuid-1  # UUIDv4 of an existing metric
 #     columnSizes: [12]   # Required - must sum to exactly 12
 #   - id: 2 # REQUIRED
 #     items:
-#       - id: "metric-uuid-2"
+#       - id: metric-uuid-2
-#       - id: "metric-uuid-3"
+#       - id: metric-uuid-3
 #     columnSizes: [6, 6] # Required - must sum to exactly 12
 #
 # Rules:
@ -1047,6 +1045,13 @@ pub struct ModifyFilesOutput {
    pub message: String,
    pub duration: i64,
    pub files: Vec<FileWithId>,
    pub failed_files: Vec<FailedFileModification>,
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct FailedFileModification {
    pub file_name: String,
    pub error: String,
 }
 #[derive(Debug)]
--- a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs
@ -17,7 +17,7 @@ use tracing::{debug, error, info};
 use super::{
    common::{
        process_dashboard_file_modification, ModificationResult, ModifyFilesOutput,
-        ModifyFilesParams,
+        ModifyFilesParams, FailedFileModification,
    },
    file_types::file::FileWithId,
    FileModificationTool,
@ -178,14 +178,16 @@ impl ToolExecutor for ModifyDashboardFilesTool {
        let duration = start_time.elapsed().as_millis() as i64;
        let mut output = ModifyFilesOutput {
            message: format!(
-                "Modified {} dashboard files and created new versions",
+                "Modified {} dashboard files and created new versions. {} failures.",
-                batch.files.len()
+                batch.files.len(),
                batch.failed_modifications.len()
            ),
            duration,
            files: Vec::new(),
            failed_files: Vec::new(),
        };
-        // Add files to output
+        // Add successful files to output
        output
            .files
            .extend(batch.files.iter().enumerate().map(|(i, file)| {
@ -203,6 +205,14 @@ impl ToolExecutor for ModifyDashboardFilesTool {
                }
            }));
        // Add failed modifications to output
        output.failed_files.extend(
            batch
                .failed_modifications
                .into_iter()
                .map(|(file_name, error)| FailedFileModification { file_name, error }),
        );
        Ok(output)
    }
@ -352,7 +362,7 @@ async fn get_modify_dashboards_new_content_description() -> String {
 async fn get_modify_dashboards_content_to_replace_description() -> String {
    if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "The exact content in the file that should be replaced. Must match exactly and be specific enough to only match once. Use an empty string to append the new content to the end of the file."
+        return "The exact content in the file that should be replaced. Precise matching is crucial: the provided content must match exactly and be specific enough to target only the intended section, avoiding unintended replacements. This should typically be a small, specific snippet; replacing the entire file content is usually not intended unless the entire dashboard definition needs a complete overhaul. Use an empty string to append the new content to the end of the file."
            .to_string();
    }
@ -361,7 +371,7 @@ async fn get_modify_dashboards_content_to_replace_description() -> String {
        Ok(message) => message,
        Err(e) => {
            eprintln!("Failed to get prompt system message: {}", e);
-            "The exact content in the file that should be replaced. Must match exactly and be specific enough to only match once. Use an empty string to append the new content to the end of the file.".to_string()
+            "The exact content in the file that should be replaced. Precise matching is crucial: the provided content must match exactly and be specific enough to target only the intended section, avoiding unintended replacements. This should typically be a small, specific snippet; replacing the entire file content is usually not intended unless the entire dashboard definition needs a complete overhaul. Use an empty string to append the new content to the end of the file.".to_string()
        }
    }
 }
--- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
@ -21,6 +21,7 @@ use uuid::Uuid;
 use super::{
    common::{
        process_metric_file_modification, ModificationResult, ModifyFilesOutput, ModifyFilesParams,
        FailedFileModification,
    },
    file_types::file::FileWithId,
    FileModificationTool,
@ -91,6 +92,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
                return Ok(ModifyFilesOutput {
                    message: format!("Failed to connect to database: {}", e),
                    files: Vec::new(),
                    failed_files: Vec::new(),
                    duration,
                });
            }
@ -211,6 +213,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
                    return Ok(ModifyFilesOutput {
                        message: format!("Failed to fetch metric files: {}", e),
                        files: Vec::new(),
                        failed_files: Vec::new(),
                        duration,
                    });
                }
@ -219,11 +222,6 @@ impl ToolExecutor for ModifyMetricFilesTool {
        // Process results and generate output message
        let duration = start_time.elapsed().as_millis() as i64;
        let _output = ModifyFilesOutput {
            message: String::new(),
            files: Vec::new(),
            duration,
        };
        // Update metric files in database with version history and metadata
        if !batch.files.is_empty() {
@ -256,18 +254,19 @@ impl ToolExecutor for ModifyMetricFilesTool {
            }
        }
-        // Generate output
+        // Construct final output
        let duration = start_time.elapsed().as_millis() as i64;
        let mut output = ModifyFilesOutput {
            message: format!(
-                "Modified {} metric files and created new versions",
+                "Modified {} metric files and created new versions. {} failures.",
-                batch.files.len()
+                batch.files.len(),
                batch.failed_modifications.len()
            ),
            duration,
            files: Vec::new(),
            failed_files: Vec::new(),
        };
-        // Add files to output
+        // Add successful files to output
        output
            .files
            .extend(batch.files.iter().enumerate().map(|(i, file)| {
@ -285,6 +284,14 @@ impl ToolExecutor for ModifyMetricFilesTool {
                }
            }));
        // Add failed modifications to output
        output.failed_files.extend(
            batch
                .failed_modifications
                .into_iter()
                .map(|(file_name, error)| FailedFileModification { file_name, error }),
        );
        Ok(output)
    }
--- a/cli/cli/src/commands/chat/completion.rs
+++ b/cli/cli/src/commands/chat/completion.rs
@ -25,48 +25,28 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
    let token_start_index = input.rfind(' ').map_or(0, |i| i + 1);
    let potential_fragment = &input[token_start_index..];
-    if potential_fragment.is_empty() && !input.ends_with(' ') {
+    // Handle empty input or input ending with space
-        // If the whole input is empty, or the last non-empty part doesn't look like a path start
+    if potential_fragment.is_empty() {
-        // Let's offer completions from the CWD if the *entire* input is empty OR ends with space
+        if input.is_empty() || input.ends_with(' ') {
-         if input.is_empty() || input.ends_with(' '){
+            // Offer completions from CWD for empty input or space-terminated input
-             return Some(PathCompletionTarget {
+            return Some(PathCompletionTarget {
                fragment: "",
                base_dir_str: "",
                prefix: "",
                search_dir: cwd.to_path_buf(),
                fragment_start_index: input.len(), // At the end
            });
-         } else {
+        } else {
            // Don't autocomplete mid-word *unless* it looks like a path
-             if !potential_fragment.contains('/') && !potential_fragment.starts_with('.'){
+            if !potential_fragment.contains('/') && !potential_fragment.starts_with('.') {
                return None;
-             }
+            }
             // If it looks like a path fragment even without spaces, continue parsing below.
        }
    } else if potential_fragment.is_empty() && input.ends_with(' ') {
         // Explicitly trigger completion from CWD if input ends with space
         return Some(PathCompletionTarget {
                fragment: "",
                base_dir_str: "",
                prefix: "",
                search_dir: cwd.to_path_buf(),
                fragment_start_index: input.len(), // At the end
            });
    }
-
+    // For any non-empty fragment, we'll try to offer completions
-    // Basic check: Does it look like a path at all? (./ ../ / or contains /)
+    // Whether or not it contains path separators
-    // Or is it just a potential file/dir name in the CWD?
+    
    let is_path_like = potential_fragment.starts_with("./")
        || potential_fragment.starts_with("../")
        // || potential_fragment.starts_with('/') // Avoid absolute paths for now
        || potential_fragment.contains('/');
    // If it's not explicitly path-like, assume it's a potential file/dir in CWD
     if !is_path_like && potential_fragment.contains(std::path::is_separator) {
         return None; // Avoid completing things like "some command with/slashes"
     }
    // Find the last path separator to determine base directory and prefix
    let (fragment_base_dir_str, prefix) = match potential_fragment.rfind('/') {
        Some(sep_index) => {
@ -75,7 +55,8 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
            (&potential_fragment[..=sep_index], &potential_fragment[sep_index + 1..])
        }
        None => {
-            // Example: "READ"       -> base="", prefix="READ"
+            // Example: "chat"       -> base="", prefix="chat"
            // Treat any word as a potential filename or directory name
            ("", potential_fragment)
        }
    };
@ -87,17 +68,33 @@ fn parse_input_for_completion<'a>(input: &'a str, cwd: &Path) -> Option<PathComp
    // Basic security/sanity check: prevent "escaping" the CWD with excessive "../"
    let canonical_search = match search_dir_abs.canonicalize() {
        Ok(p) => p,
-        Err(_) => return None, // Invalid base path derived from fragment
+        Err(_) => {
            // If the base directory doesn't exist, default to CWD for searching
            return Some(PathCompletionTarget {
                fragment: potential_fragment,
                base_dir_str: "",
                prefix: potential_fragment, // Use the whole fragment as prefix
                search_dir: cwd.to_path_buf(),
                fragment_start_index: token_start_index,
            });
        }
    };
    let canonical_cwd = match cwd.canonicalize() {
         Ok(p) => p,
         Err(_) => return None, // Should not happen normally
    };
    if !canonical_search.starts_with(&canonical_cwd) {
        // eprintln!("Warning: Completion attempted outside of CWD.");
        return None; // Don't allow completions outside the initial CWD
    }
    let canonical_cwd = match cwd.canonicalize() {
        Ok(p) => p,
        Err(_) => return None, // Should not happen normally
    };
    if !canonical_search.starts_with(&canonical_cwd) {
        // Fall back to CWD for security
        return Some(PathCompletionTarget {
            fragment: potential_fragment,
            base_dir_str: "",
            prefix: potential_fragment, // Use the whole fragment as prefix
            search_dir: cwd.to_path_buf(),
            fragment_start_index: token_start_index,
        });
    }
    Some(PathCompletionTarget {
        fragment: potential_fragment,
@ -140,27 +137,25 @@ fn find_completions_recursive(
                }
                let entry_path_relative = relative_path.join(name);
                let entry_path_str = entry_path_relative.to_string_lossy().to_lowercase();
                let name_lower = name.to_lowercase();
-                // Case-insensitive check against the *full relative path*
+                // Check if this entry should be included in completions:
-                if entry_path_relative.to_string_lossy().to_lowercase().starts_with(base_prefix_lower) {
+                // 1. Empty prefix means include everything from starting directory
-                    let mut completion_path = entry_path_relative.clone(); // Path relative to initial search
+                // 2. Name starts with prefix (traditional completion)
                // 3. Any part of path contains the prefix (fuzzy search)
                let matches_path = entry_path_str.contains(base_prefix_lower);
                let matches_name = name_lower.contains(base_prefix_lower);
                let should_include = base_prefix_lower.is_empty() || matches_name || matches_path;
                if should_include {
                    let completion_path = entry_path_relative.clone(); // Path relative to initial search
                    match entry.file_type() {
                         Ok(ft) => {
                            if ft.is_dir() {
                                // Add the directory itself to completions (with a slash)
-                                // Check if the match is the directory name itself or something inside it
+                                completions.push(completion_path);
                                if name.to_lowercase().starts_with(base_prefix_lower) || base_prefix_lower.is_empty() || base_prefix_lower.starts_with(&name.to_lowercase()) {
                                    completions.push(completion_path);
                                    // Recurse into subdirectory
                                    let _ = find_completions_recursive(
                                        &entry.path(),
                                        base_prefix_lower,
                                        &entry_path_relative,
                                        completions,
                                        max_depth - 1,
                                    );
                                }
                            } else if ft.is_file() {
                                // Add the file
                                completions.push(completion_path);
@ -169,6 +164,23 @@ fn find_completions_recursive(
                         Err(_) => { /* Ignore file type errors */ }
                    }
                }
                // Always recurse into directories, even if they don't match,
                // as they might contain matching files or subdirectories
                match entry.file_type() {
                    Ok(ft) => {
                        if ft.is_dir() {
                            let _ = find_completions_recursive(
                                &entry.path(),
                                base_prefix_lower,
                                &entry_path_relative,
                                completions,
                                max_depth - 1,
                            );
                        }
                    }
                    Err(_) => { /* Ignore file type errors */ }
                }
            }
        }
    }
@ -197,7 +209,7 @@ pub fn get_completions<'a>(input: &'a str, cwd_str: &str) -> (Vec<String>, Optio
    let mut raw_completions = Vec::new();
    let prefix_lower = target.prefix.to_lowercase();
-    let max_depth = 5; // Limit recursion depth
+    let max_depth = 10; // Increased recursion depth for better deep directory search
    // Start recursive search from the absolute search_dir identified by parse_input
    // Pass an empty initial relative path
@ -225,14 +237,24 @@ pub fn get_completions<'a>(input: &'a str, cwd_str: &str) -> (Vec<String>, Optio
        final_completions.push(completion_str);
    }
    // Deduplicate before sorting - some paths might show up multiple times
    // due to the more aggressive recursive search
    final_completions.sort();
    final_completions.dedup();
    // Now sort with directories first
    final_completions.sort_by(|a, b| {
        let a_is_dir = a.ends_with('/');
        let b_is_dir = b.ends_with('/');
        // Sort directories first, then files, then alphabetically
        b_is_dir.cmp(&a_is_dir).then_with(|| a.cmp(b))
    });
-    final_completions.dedup(); // Remove duplicates that might arise
+
    // Limit results to a reasonable number to avoid overwhelming the UI
    let max_completions = 100;
    if final_completions.len() > max_completions {
        final_completions.truncate(max_completions);
    }
    (final_completions, Some(target))
 }
@ -336,10 +358,18 @@ mod tests {
        let (completions, _) = get_completions("src/sub", cwd_str);
        assert_eq!(completions, vec!["src/subdir/", "src/subdir/deep_file.txt"]);
-         // Complete file in deep path
+        // Complete file in deep path
        let (completions, _) = get_completions("src/subdir/d", cwd_str);
        assert_eq!(completions, vec!["src/subdir/deep_file.txt"]);
        // Test flexible search (no path structure)
        // This is the key new functionality - finds matches anywhere in the directory tree
        let (completions, _) = get_completions("deep", cwd_str);
        assert_eq!(completions, vec!["src/subdir/deep_file.txt"]);
        let (completions, _) = get_completions("lib", cwd_str);
        assert_eq!(completions, vec!["src/lib.rs"]);
        // Empty input -> complete from CWD
        let (completions, _) = get_completions("", cwd_str);
        assert_eq!(completions, vec!["src/", "LICENSE", "README.md"]); // Only top level for empty