From 692e28a75e1adf594641fda8e94068b56789762b Mon Sep 17 00:00:00 2001
From: dal <dallin@buster.so>
Date: Fri, 18 Apr 2025 09:08:14 -0600
Subject: [PATCH] bulk creation and deduplications

---
 api/libs/agents/src/agents/modes/analysis.rs         |  8 +++++---
 .../agents/src/tools/categories/file_tools/common.rs | 11 +++--------
 .../tools/categories/file_tools/create_metrics.rs    |  8 ++++----
 .../tools/categories/file_tools/modify_metrics.rs    | 12 ++++++------
 api/libs/database/src/types/metric_yml.rs            |  7 ++-----
 5 files changed, 20 insertions(+), 26 deletions(-)
diff --git a/api/libs/agents/src/agents/modes/analysis.rs b/api/libs/agents/src/agents/modes/analysis.rs
index 4b22c8be6..93e701e1e 100644
--- a/api/libs/agents/src/agents/modes/analysis.rs
+++ b/api/libs/agents/src/agents/modes/analysis.rs
@@ -201,7 +201,7 @@ You can create, update, or modify the following assets, which are automatically
   
   **Key Features**:
   - **Simultaneous Creation (or Updates)**: When creating a metric, you write the SQL statement (or specify a data frame) and the chart configuration at the same time within the YAML file.
-  - **Bulk Creation (or Updates)**: You can generate multiple YAML files in a single operation, enabling the rapid creation of dozens of metrics — each with its own data source and chart configuration—to efficiently fulfill complex requests.
+  - **Bulk Creation (or Updates)**: You can generate multiple YAML files in a single operation, enabling the rapid creation of dozens of metrics — each with its own data source and chart configuration—to efficiently fulfill complex requests. **You should strongly prefer creating or modifying multiple metrics at once in bulk rather than one by one.**
   - **Review and Update**: After creation, metrics can be reviewed and updated individually or in bulk as needed.
   - **Use in Dashboards**: Metrics can be saved to dashboards for further use.
 
@@ -212,7 +212,7 @@ You can create, update, or modify the following assets, which are automatically
 ### Creating vs Updating Asssets
 
 - If the user asks for something that hasn't been created yet (e.g. a chart or dashboard), create a new asset. 
-- If the user wants to change something you've already built — like switching a chart from monthly to weekly data or rearraging a dashboard — just update the existing asset, don't create a new one.
+- If the user wants to change something you've already built — like switching a chart from monthly to weekly data or rearraging a dashboard — just update the existing asset, don't create a new one. **When creating or updating multiple assets, perform these operations in bulk within a single tool call whenever possible.**
 
 ### Finish With the `finish_and_respond` Tool
 
@@ -236,7 +236,8 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final
 ## SQL Best Practices and Constraints** (when creating new metrics)  
 - **Constraints**: Only join tables with explicit entity relationships.  
 - **SQL Requirements**:  
-  - Use schema-qualified table names (`<SCHEMA_NAME>.<TABLE_NAME>`).  
+  - Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).  
+  - Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
   - Select specific columns (avoid `SELECT *` or `COUNT(*)`).  
   - Use CTEs instead of subqueries, and use snake_case for naming them.  
   - Use `DISTINCT` (not `DISTINCT ON`) with matching `GROUP BY`/`SORT BY` clauses.  
@@ -248,6 +249,7 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final
   - Maintain a consistent data structure across requests unless changes are required.  
   - Use explicit ordering for custom buckets or categories.
   - Avoid division by zero errors by using NULLIF() or CASE statements (e.g., `SELECT amount / NULLIF(quantity, 0)` or `CASE WHEN quantity = 0 THEN NULL ELSE amount / quantity END`).
+  - Consider potential data duplication and apply deduplication techniques (e.g., `DISTINCT`, `GROUP BY`) where necessary.
 ---
 
 You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
diff --git a/api/libs/agents/src/tools/categories/file_tools/common.rs b/api/libs/agents/src/tools/categories/file_tools/common.rs
index f9fe68ef2..7391ac003 100644
--- a/api/libs/agents/src/tools/categories/file_tools/common.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/common.rs
@@ -192,20 +192,15 @@ properties:
   ### SQL Best Practices and Constraints** (when creating new metrics)  
   #  - **Constraints**: Only join tables with explicit entity relationships.  
   #  - **SQL Requirements**:  
-  #    - Use schema-qualified table names (`<SCHEMA_NAME>.<TABLE_NAME>`).  
+  #    - Use schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).  
+  #    - Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
   #    - Select specific columns (avoid `SELECT *` or `COUNT(*)`).  
   #    - Use CTEs instead of subqueries, and use snake_case for naming them.  
   #    - Use `DISTINCT` (not `DISTINCT ON`) with matching `GROUP BY`/`SORT BY` clauses.  
   #    - Show entity names rather than just IDs.  
   #    - Handle date conversions appropriately.  
   #    - Order dates in ascending order.
-  #    - Reference database identifiers for cross-database queries.  
-  #    - Format output for the specified visualization type.  
-  #    - Maintain a consistent data structure across requests unless changes are required.  
-  #    - Use explicit ordering for custom buckets or categories.
-  #    - When grouping metrics by dates, default to monthly granularity for spans over 2 months, yearly for over 3 years, weekly for under 2 months, and daily for under a week, unless the user specifies a different granularity.
-  #    - Avoid division by zero errors by using NULLIF() or CASE statements (e.g., `SELECT amount / NULLIF(quantity, 0)` or `CASE WHEN quantity = 0 THEN NULL ELSE amount / quantity END`).
-  ###
+  #    - Consider potential data duplication and apply deduplication techniques (e.g., `DISTINCT`, `GROUP BY`) where necessary.
   sql:
     required: true
     type: string
diff --git a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
index 22de2fa8b..df2efe126 100644
--- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
@@ -297,7 +297,7 @@ impl ToolExecutor for CreateMetricFilesTool {
 
 async fn get_create_metrics_description() -> String {
     if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics.".to_string();
+        return "Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics. **This tool supports creating multiple metrics in a single call; prefer using bulk creation over creating metrics one by one.**".to_string();
     }
 
     let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@@ -305,7 +305,7 @@ async fn get_create_metrics_description() -> String {
         Ok(message) => message,
         Err(e) => {
             eprintln!("Failed to get prompt system message: {}", e);
-            "Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics.".to_string()
+            "Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics. **This tool supports creating multiple metrics in a single call; prefer using bulk creation over creating metrics one by one.**".to_string()
         }
     }
 }
@@ -328,7 +328,7 @@ async fn get_metric_name_description() -> String {
 async fn get_metric_yml_description() -> String {
     if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
         // Revert to just returning the schema string
-        return METRIC_YML_SCHEMA.to_string();
+        return format!("The YAML content for a single metric, adhering to the schema below. Multiple metrics can be created in one call by providing multiple entries in the 'files' array. **Prefer creating metrics in bulk.**\n\n{}", METRIC_YML_SCHEMA);
     }
 
     let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@@ -337,7 +337,7 @@ async fn get_metric_yml_description() -> String {
         Err(e) => {
             eprintln!("Failed to get prompt system message: {}", e);
             // Revert to just returning the schema string on error
-            METRIC_YML_SCHEMA.to_string()
+            format!("The YAML content for a single metric, adhering to the schema below. Multiple metrics can be created in one call by providing multiple entries in the 'files' array. **Prefer creating metrics in bulk.**\n\n{}", METRIC_YML_SCHEMA)
         }
     }
 }
diff --git a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
index 312704d49..915150176 100644
--- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
@@ -474,7 +474,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
 
 async fn get_modify_metrics_description() -> String {
     if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "Updates existing metric configuration files with new YAML content. Provide the complete YAML content for each metric, replacing the entire existing file. This tool is ideal for bulk modifications when you need to update multiple metrics simultaneously. The system will preserve version history and perform all necessary validations on the new content. For each metric, you need its UUID and the complete updated YAML content.".to_string();
+        return "Updates existing metric configuration files with new YAML content. Provide the complete YAML content for each metric, replacing the entire existing file. This tool is ideal for bulk modifications when you need to update multiple metrics simultaneously. The system will preserve version history and perform all necessary validations on the new content. For each metric, you need its UUID and the complete updated YAML content. **Prefer modifying metrics in bulk using this tool rather than one by one.**".to_string();
     }
 
     let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@@ -482,14 +482,14 @@ async fn get_modify_metrics_description() -> String {
         Ok(message) => message,
         Err(e) => {
             eprintln!("Failed to get prompt system message: {}", e);
-            "Updates existing metric configuration files with new YAML content. Provide the complete YAML content for each metric, replacing the entire existing file. This tool is ideal for bulk modifications when you need to update multiple metrics simultaneously. The system will preserve version history and perform all necessary validations on the new content. For each metric, you need its UUID and the complete updated YAML content.".to_string()
+            "Updates existing metric configuration files with new YAML content. Provide the complete YAML content for each metric, replacing the entire existing file. This tool is ideal for bulk modifications when you need to update multiple metrics simultaneously. The system will preserve version history and perform all necessary validations on the new content. For each metric, you need its UUID and the complete updated YAML content. **Prefer modifying metrics in bulk using this tool rather than one by one.**".to_string()
         }
     }
 }
 
 async fn get_modify_metrics_yml_description() -> String {
     if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "Array of metrics to update. Each item requires an 'id' (UUID of the existing metric) and 'yml_content' (complete new YAML content that follows the specification below). You can update multiple metrics in a single operation, making this ideal for bulk updates.".to_string();
+        return "Array of metrics to update. Each item requires an 'id' (UUID of the existing metric) and 'yml_content' (complete new YAML content that follows the specification below). You can update multiple metrics in a single operation, making this ideal for bulk updates. **Prefer using this for bulk updates rather than modifying metrics individually.**".to_string();
     }
 
     let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@@ -497,7 +497,7 @@ async fn get_modify_metrics_yml_description() -> String {
         Ok(message) => message,
         Err(e) => {
             eprintln!("Failed to get prompt system message: {}", e);
-            "Array of metrics to update. Each item requires an 'id' (UUID of the existing metric) and 'yml_content' (complete new YAML content that follows the specification below). You can update multiple metrics in a single operation, making this ideal for bulk updates.".to_string()
+            "Array of metrics to update. Each item requires an 'id' (UUID of the existing metric) and 'yml_content' (complete new YAML content that follows the specification below). You can update multiple metrics in a single operation, making this ideal for bulk updates. **Prefer using this for bulk updates rather than modifying metrics individually.**".to_string()
         }
     }
 }
@@ -505,7 +505,7 @@ async fn get_modify_metrics_yml_description() -> String {
 async fn get_metric_yml_description() -> String {
     if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
         // Revert to just returning the schema string plus basic instruction
-        return format!("The complete new YAML content for the metric, following the metric schema specification. This will replace the entire existing content of the file. Ensure all required fields are present and properly formatted according to the schema.\n\n{}", METRIC_YML_SCHEMA);
+        return format!("The complete new YAML content for the metric, following the metric schema specification. This will replace the entire existing content of the file. Ensure all required fields are present and properly formatted according to the schema. When modifying multiple metrics, provide each in the 'files' array. **Prefer bulk modifications.**\n\n{}", METRIC_YML_SCHEMA);
     }
 
     let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@@ -514,7 +514,7 @@ async fn get_metric_yml_description() -> String {
         Err(e) => {
             eprintln!("Failed to get prompt system message: {}", e);
             // Revert to just returning the schema string plus basic instruction on error
-            format!("The complete new YAML content for the metric, following the metric schema specification. This will replace the entire existing content of the file. Ensure all required fields are present and properly formatted according to the schema.\n\n{}", METRIC_YML_SCHEMA)
+            format!("The complete new YAML content for the metric, following the metric schema specification. This will replace the entire existing content of the file. Ensure all required fields are present and properly formatted according to the schema. When modifying multiple metrics, provide each in the 'files' array. **Prefer bulk modifications.**\n\n{}", METRIC_YML_SCHEMA)
         }
     }
 }
diff --git a/api/libs/database/src/types/metric_yml.rs b/api/libs/database/src/types/metric_yml.rs
index 9dc06ba19..756b465ce 100644
--- a/api/libs/database/src/types/metric_yml.rs
+++ b/api/libs/database/src/types/metric_yml.rs
@@ -22,6 +22,7 @@ fn sanitize_yaml_string(value: &str) -> String {
         .replace('\"', "") // Remove double quotes
         .replace('\n', " ") // Replace newlines with spaces
         .replace('\t', " ") // Replace tabs with spaces
+        .replace('%', "Percent") // Replace % with Percent
         .trim() // Trim leading/trailing whitespace
         .to_string()
 }
@@ -575,13 +576,9 @@ impl MetricYml {
                 }
 
                 let sanitized_value = sanitize_yaml_string(value);
-                // Reconstruct line, potentially quoting if value was empty after sanitizing?
-                // For now, just place the sanitized value. YAML might handle empty strings okay.
-                // If the value needs quotes (e.g., contains special chars AFTER sanitization, though unlikely now)
-                // we might need more complex logic. Let's assume simple value placement is fine.
                 processed_lines.push(format!("{}{}: {}", indent, key, sanitized_value));
             } else {
-                // Add lines that don't match any processing rules
+                // Add lines that don't match any processing rules (like the sql block)
                 processed_lines.push(line.to_string());
             }
         }