diff --git a/api/libs/agents/src/tools/categories/file_tools/common.rs b/api/libs/agents/src/tools/categories/file_tools/common.rs index ac96af7cc..446413dd7 100644 --- a/api/libs/agents/src/tools/categories/file_tools/common.rs +++ b/api/libs/agents/src/tools/categories/file_tools/common.rs @@ -137,7 +137,7 @@ pub const METRIC_YML_SCHEMA: &str = r##" # --- GENERAL YAML RULES --- # 1. Use standard YAML syntax (indentation, colons for key-value, `-` for arrays). # 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (`"...") ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, `) or needs to preserve leading/trailing whitespace. -# 3. Metric name should not contain `:` +# 3. Metric name or description should not contain `:` # ------------------------------------- # --- FORMAL SCHEMA --- (Used for validation, reflects rules above) diff --git a/api/libs/database/Cargo.toml b/api/libs/database/Cargo.toml index 88fd28b00..89e0ac47d 100644 --- a/api/libs/database/Cargo.toml +++ b/api/libs/database/Cargo.toml @@ -9,13 +9,17 @@ anyhow = { workspace = true } chrono = { workspace = true } diesel = { workspace = true } diesel-async = { workspace = true } +diesel_migrations = { workspace = true } +futures-util = { workspace = true } +indexmap = { workspace = true } +regex = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +serde_yaml = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } futures = { workspace = true } -indexmap = { workspace = true } once_cell = { workspace = true } rustls = { workspace = true } rustls-native-certs = { workspace = true } @@ -23,12 +27,11 @@ sqlx = { workspace = true } tokio-postgres = { workspace = true } tokio-postgres-rustls = { workspace = true } bb8-redis = { workspace = true } -serde_yaml = { workspace = true } dotenv = { workspace = true } reqwest = { workspace = true } +lazy_static = { workspace = true } [dev-dependencies] tokio-test = { workspace = true } -lazy_static = { workspace = true } ctor = "0.4.1" \ No newline at end of file diff --git a/api/libs/database/src/types/dashboard_yml.rs b/api/libs/database/src/types/dashboard_yml.rs index bc54bf597..38d25f0aa 100644 --- a/api/libs/database/src/types/dashboard_yml.rs +++ b/api/libs/database/src/types/dashboard_yml.rs @@ -10,6 +10,12 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::io::Write; use uuid::Uuid; +use regex::Regex; +use lazy_static::lazy_static; + +lazy_static! { + static ref DASHBOARD_NAME_DESC_RE: Regex = Regex::new(r#"^(\s*(?:name|description):\s*)(.*)$"#).unwrap(); +} #[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)] #[diesel(sql_type = Jsonb)] @@ -51,41 +57,48 @@ pub struct RowItem { impl DashboardYml { pub fn new(yml_content: String) -> Result { - // Oftentimes if the llm is creating a new dashboard, it will not include the id. - let mut file: DashboardYml = match serde_yaml::from_str(&yml_content) { + let processed_yml_content = yml_content + .lines() + .map(|line| { + if let Some(captures) = DASHBOARD_NAME_DESC_RE.captures(line) { + let key_part = captures.get(1).map_or("", |m| m.as_str()); + let value_part = captures.get(2).map_or("", |m| m.as_str()); + let sanitized_value = value_part.replace(':', ""); + format!("{}{}", key_part, sanitized_value) + } else { + line.to_string() + } + }) + .collect::>() + .join("\n"); + + let mut file: DashboardYml = match serde_yaml::from_str(&processed_yml_content) { Ok(file) => file, Err(e) => return Err(anyhow::anyhow!("Error parsing YAML: {}", e)), }; - // If the name is not provided, we will use a default name. if file.name.is_empty() { file.name = String::from("New Dashboard"); } - // Add row IDs if they don't exist for (index, row) in file.rows.iter_mut().enumerate() { if row.id == 0 { row.id = (index + 1) as u32; } } - // Validate the file match file.validate() { Ok(_) => Ok(file), Err(e) => Err(anyhow::anyhow!("Error compiling file: {}", e)), } } - // TODO: The validate of the dashboard should also be whether metrics exist? pub fn validate(&self) -> Result<()> { - // Validate the id and name if self.name.is_empty() { return Err(anyhow::anyhow!("Dashboard file name is required")); } - // Validate each row for row in &self.rows { - // Check row height constraints if let Some(row_height) = row.row_height { if !(320..=550).contains(&row_height) { return Err(anyhow::anyhow!( @@ -95,7 +108,6 @@ impl DashboardYml { } } - // Check number of items constraint if row.items.is_empty() || row.items.len() > 4 { return Err(anyhow::anyhow!( "Number of items in row must be between 1 and 4, got {}", @@ -103,7 +115,6 @@ impl DashboardYml { )); } - // Check column sizes sum to valid amount if row.column_sizes.is_empty() || row.column_sizes.len() > 4 { return Err(anyhow::anyhow!( "Number of column sizes must be between 1 and 4, got {}", @@ -111,7 +122,6 @@ impl DashboardYml { )); } - // Check column sizes match number of items if row.column_sizes.len() != row.items.len() { return Err(anyhow::anyhow!( "Number of column sizes ({}) must match number of items ({})", @@ -120,7 +130,6 @@ impl DashboardYml { )); } - // Validate sum of column_sizes is exactly 12 let sum: u32 = row.column_sizes.iter().sum(); if sum != 12 { return Err(anyhow::anyhow!( @@ -129,7 +138,6 @@ impl DashboardYml { )); } - // Check individual column sizes are at least 3 for &size in &row.column_sizes { if size < 3 { return Err(anyhow::anyhow!( @@ -148,7 +156,6 @@ impl DashboardYml { .map_err(|e| anyhow::anyhow!("Failed to serialize dashboard yml: {}", e)) } - /// Returns the next available row ID based on existing rows pub fn get_next_row_id(&self) -> u32 { self.rows .iter() @@ -157,7 +164,6 @@ impl DashboardYml { .map_or(1, |max_id| max_id + 1) } - /// Adds a new row with provided items and auto-generates the row ID pub fn add_row(&mut self, items: Vec, row_height: Option, column_sizes: Vec) { let next_id = self.get_next_row_id(); self.rows.push(Row { @@ -178,7 +184,7 @@ impl FromSql for DashboardYml { impl ToSql for DashboardYml { fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { - out.write_all(&[1])?; // JSONB version 1 header + out.write_all(&[1])?; out.write_all(&serde_json::to_vec(self)?)?; Ok(IsNull::No) } @@ -191,10 +197,6 @@ mod tests { #[test] fn test_dashboard_yml_camel_case_serialization() { - // Test case: Verify that DashboardYml serializes to camelCase - // Expected: JSON fields should be in camelCase format - - // Create a dashboard with one row let dashboard = DashboardYml { name: "Test Dashboard".to_string(), description: Some("This is a test dashboard".to_string()), @@ -212,32 +214,24 @@ mod tests { ], }; - // Serialize to JSON let json = serde_json::to_value(&dashboard).unwrap(); - // Verify camelCase field names in the output assert!(json.get("name").is_some()); assert!(json.get("description").is_some()); assert!(json.get("rows").is_some()); - // Check row fields are in camelCase let row = &json["rows"][0]; assert!(row.get("items").is_some()); assert!(row.get("rowHeight").is_some()); assert!(row.get("columnSizes").is_some()); assert!(row.get("id").is_some()); - // Verify snake_case field names are NOT present assert!(row.get("row_height").is_none()); assert!(row.get("column_sizes").is_none()); } #[test] fn test_dashboard_yml_snake_case_deserialization() { - // Test case: Verify that DashboardYml deserializes from snake_case - // Expected: Both snake_case and camelCase fields should be accepted - - // Create JSON with snake_case fields let json = json!({ "name": "Test Dashboard", "description": "This is a test dashboard", @@ -255,28 +249,21 @@ mod tests { ] }); - // Convert to YAML and use the new method to assign IDs let yaml = serde_yaml::to_string(&json).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap(); - // Verify fields were properly deserialized assert_eq!(dashboard.name, "Test Dashboard"); assert_eq!(dashboard.description, Some("This is a test dashboard".to_string())); assert_eq!(dashboard.rows.len(), 1); assert_eq!(dashboard.rows[0].row_height, Some(400)); assert_eq!(dashboard.rows[0].column_sizes, vec![12]); - // Check that a row ID was assigned assert_eq!(dashboard.rows[0].id, 1); } #[test] fn test_dashboard_yml_camel_case_deserialization() { - // Test case: Verify that DashboardYml deserializes from camelCase - // Expected: camelCase fields should be properly deserialized - - // Create JSON with camelCase fields let json = json!({ "name": "Test Dashboard", "description": "This is a test dashboard", @@ -294,52 +281,43 @@ mod tests { ] }); - // Convert to YAML and use the new method to assign IDs let yaml = serde_yaml::to_string(&json).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap(); - // Verify fields were properly deserialized assert_eq!(dashboard.name, "Test Dashboard"); assert_eq!(dashboard.description, Some("This is a test dashboard".to_string())); assert_eq!(dashboard.rows.len(), 1); assert_eq!(dashboard.rows[0].row_height, Some(400)); assert_eq!(dashboard.rows[0].column_sizes, vec![12]); - // Check that a row ID was assigned assert_eq!(dashboard.rows[0].id, 1); } #[test] fn test_row_id_generation() { - // Test case: Verify that row IDs are properly generated - // Expected: Row IDs should increment by 1 for each row - - // Create a dashboard from YAML without row IDs let yaml = r#" name: Test Dashboard description: This is a test dashboard rows: - - id: 0 # using 0 as a signal to generate a new ID + - id: 0 items: - id: 00000000-0000-0000-0000-000000000001 rowHeight: 400 columnSizes: [12] - - id: 0 # using 0 as a signal to generate a new ID + - id: 0 items: - id: 00000000-0000-0000-0000-000000000002 rowHeight: 320 columnSizes: [12] - - id: 0 # using 0 as a signal to generate a new ID + - id: 0 items: - id: 00000000-0000-0000-0000-000000000003 rowHeight: 550 columnSizes: [12] "#; - // Create dashboard using the new method (which should assign row IDs) let dashboard = DashboardYml::new(yaml.to_string()).unwrap(); - // Verify that row IDs were assigned in sequence assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[1].id, 2); assert_eq!(dashboard.rows[2].id, 3); @@ -347,10 +325,6 @@ rows: #[test] fn test_add_row_method() { - // Test case: Verify that the add_row method assigns the next available ID - // Expected: New rows get the next sequential ID - - // Create a dashboard with one row let mut dashboard = DashboardYml { name: "Test Dashboard".to_string(), description: None, @@ -358,41 +332,33 @@ rows: Row { items: vec![RowItem { id: Uuid::new_v4() }], row_height: None, - column_sizes: vec![12], // Must sum to 12 + column_sizes: vec![12], id: 1, } ], }; - // Add a second row using the add_row method dashboard.add_row( vec![RowItem { id: Uuid::new_v4() }], Some(400), - vec![12], // Must sum to 12 + vec![12], ); - // Add a third row dashboard.add_row( vec![RowItem { id: Uuid::new_v4() }], Some(320), - vec![12], // Must sum to 12 + vec![12], ); - // Verify that row IDs were assigned in sequence assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[1].id, 2); assert_eq!(dashboard.rows[2].id, 3); - // Verify that get_next_row_id returns the expected value assert_eq!(dashboard.get_next_row_id(), 4); } #[test] fn test_non_sequential_row_ids() { - // Test case: Verify that get_next_row_id works with non-sequential IDs - // Expected: Next ID should be max(id) + 1 - - // Create a dashboard with rows that have non-sequential IDs let dashboard = DashboardYml { name: "Test Dashboard".to_string(), description: None, @@ -400,34 +366,29 @@ rows: Row { items: vec![RowItem { id: Uuid::new_v4() }], row_height: None, - column_sizes: vec![12], // Must sum to 12 + column_sizes: vec![12], id: 1, }, Row { items: vec![RowItem { id: Uuid::new_v4() }], row_height: None, - column_sizes: vec![12], // Must sum to 12 - id: 5, // Intentionally out of sequence + column_sizes: vec![12], + id: 5, }, Row { items: vec![RowItem { id: Uuid::new_v4() }], row_height: None, - column_sizes: vec![12], // Must sum to 12 + column_sizes: vec![12], id: 3, } ], }; - // Verify that get_next_row_id returns max(id) + 1 assert_eq!(dashboard.get_next_row_id(), 6); } #[test] fn test_explicitly_provided_id() { - // Test case: Verify that explicitly provided IDs are preserved during deserialization - // Expected: Row ID should match the provided value - - // Create JSON with an explicit ID field let json = json!({ "name": "Test Dashboard", "description": "This is a test dashboard", @@ -440,16 +401,14 @@ rows: ], "rowHeight": 400, "columnSizes": [12], - "id": 42 // Explicitly set ID + "id": 42 } ] }); - // Convert to YAML and use the new method let yaml = serde_yaml::to_string(&json).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap(); - // Verify the explicit ID was preserved assert_eq!(dashboard.rows[0].id, 42); } } diff --git a/api/libs/database/src/types/metric_yml.rs b/api/libs/database/src/types/metric_yml.rs index f67263a30..d3bc04100 100644 --- a/api/libs/database/src/types/metric_yml.rs +++ b/api/libs/database/src/types/metric_yml.rs @@ -11,6 +11,35 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::io::Write; use uuid::Uuid; +use regex::Regex; +use lazy_static::lazy_static; + +// Helper function to sanitize string values for YAML +fn sanitize_yaml_string(value: &str) -> String { + value + .replace(':', "") // Remove colons + .replace('\'', "") // Remove single quotes + .replace('\"', "") // Remove double quotes + .replace('\n', " ") // Replace newlines with spaces + .replace('\t', " ") // Replace tabs with spaces + .trim() // Trim leading/trailing whitespace + .to_string() +} + +lazy_static! { + // Combined regex for keys whose string values need sanitization + static ref SANITIZE_KEYS_RE: Regex = Regex::new( + r#"^(?P\s*)(?Pname|description|timeFrame|displayName|prefix|suffix|goalLineLabel|trendlineLabel|pieInnerLabelTitle|metricValueLabel):\s*(?P.*)$"# + ).unwrap(); + // Regex to find the start of the colors block and capture indentation + static ref COLORS_START_RE: Regex = Regex::new(r#"^(\s*)colors:\s*$"#).unwrap(); + // Regex to find unquoted hex color list items and capture indent/marker and color value + static ref COLOR_LINE_RE: Regex = Regex::new(r#"^(\s*-\s+)(#[0-9a-fA-F]+)\s*$"#).unwrap(); + // Regex to capture the indentation of a line (used for colors block and timeFrame insertion) + static ref INDENT_RE: Regex = Regex::new(r#"^(\s*)\S"#).unwrap(); + // Regex to find the sql key to determine insertion point for timeFrame + static ref SQL_KEY_RE: Regex = Regex::new(r#"^(\s*)sql:\s*.*$"#).unwrap(); +} #[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)] #[diesel(sql_type = Jsonb)] @@ -489,13 +518,97 @@ pub struct TableChartConfig { impl MetricYml { pub fn new(yml_content: String) -> Result { - let file: MetricYml = match serde_yaml::from_str(&yml_content) { + let mut processed_lines = Vec::new(); + let mut in_colors_block = false; + let mut colors_indent: Option = None; + let mut time_frame_found = false; + let mut sql_line_index: Option = None; + let mut sql_line_indent: Option = None; + + for (index, line) in yml_content.lines().enumerate() { + // Store SQL line info for potential timeFrame insertion + if sql_line_index.is_none() { + if let Some(caps) = SQL_KEY_RE.captures(line) { + sql_line_index = Some(index); + sql_line_indent = Some(caps.get(1).map_or("", |m| m.as_str()).to_string()); + } + } + + let current_indent = INDENT_RE.captures(line).map_or(0, |caps| { + caps.get(1).map_or(0, |m| m.as_str().len()) + }); + + // --- Colors Block Logic --- + if in_colors_block && colors_indent.map_or(false, |indent| current_indent <= indent) { + in_colors_block = false; + colors_indent = None; + } + if !in_colors_block { // Only check for start if not already in block + if let Some(caps) = COLORS_START_RE.captures(line) { + in_colors_block = true; + colors_indent = Some(caps.get(1).map_or(0, |m| m.as_str().len())); + processed_lines.push(line.to_string()); + continue; + } + } + if in_colors_block { + if let Some(caps) = COLOR_LINE_RE.captures(line) { + let marker_part = caps.get(1).map_or("", |m| m.as_str()); + let color_part = caps.get(2).map_or("", |m| m.as_str()); + processed_lines.push(format!("{}'{}'", marker_part, color_part)); + continue; + } else { + processed_lines.push(line.to_string()); // Add line within color block as is if not a hex item + continue; + } + } + // --- End Colors Block Logic --- + + // --- String Sanitization & timeFrame Check --- + if let Some(caps) = SANITIZE_KEYS_RE.captures(line) { + let indent = caps.name("indent").map_or("", |m| m.as_str()); + let key = caps.name("key").map_or("", |m| m.as_str()); + let value = caps.name("value").map_or("", |m| m.as_str()); + + if key == "timeFrame" { + time_frame_found = true; + } + + let sanitized_value = sanitize_yaml_string(value); + // Reconstruct line, potentially quoting if value was empty after sanitizing? + // For now, just place the sanitized value. YAML might handle empty strings okay. + // If the value needs quotes (e.g., contains special chars AFTER sanitization, though unlikely now) + // we might need more complex logic. Let's assume simple value placement is fine. + processed_lines.push(format!("{}{}: {}", indent, key, sanitized_value)); + } else { + // Add lines that don't match any processing rules + processed_lines.push(line.to_string()); + } + } + + // Insert default timeFrame if not found + if !time_frame_found { + if let Some(index) = sql_line_index { + // Use the indent captured from the sql line + let indent = sql_line_indent.unwrap_or_else(|| " ".to_string()); // Default indent if sql indent capture failed + processed_lines.insert(index, format!("{}timeFrame: 'all_time'", indent)); + } else { + // Fallback: append if sql key wasn't found (shouldn't happen for valid metric) + // Or maybe error out? + eprintln!("Warning: sql key not found in metric YAML, cannot insert default timeFrame correctly."); + // Append at end with default indent - might break YAML structure + processed_lines.push(" timeFrame: 'all_time'".to_string()); + } + } + + let processed_yml_content = processed_lines.join("\n"); + + // Parse the processed YAML content + let file: MetricYml = match serde_yaml::from_str(&processed_yml_content) { Ok(file) => file, Err(e) => { - // Extract field information from error message if possible let error_message = format!("Error parsing YAML: {}", e); - // Try to extract field path from YAML error let yaml_error_str = e.to_string(); let detailed_error = if yaml_error_str.contains("at line") && yaml_error_str.contains("column") { @@ -514,7 +627,6 @@ impl MetricYml { } } - //TODO: Need to validate a metric deeply. pub fn validate(&self) -> Result<()> { Ok(()) } @@ -529,7 +641,7 @@ impl FromSql for MetricYml { impl ToSql for MetricYml { fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { - out.write_all(&[1])?; // JSONB version 1 header + out.write_all(&[1])?; out.write_all(&serde_json::to_vec(self)?)?; Ok(IsNull::No) } @@ -553,15 +665,12 @@ mod tests { let metric = MetricYml::new(yml_content.to_string())?; - // Verify the basic fields assert_eq!(metric.name, "Average Time to Close by Rep"); - // Compare SQL with normalized whitespace let expected_sql = normalize_whitespace("SELECT rep_id, AVG(time_to_close) AS average_time_to_close FROM deal_metrics GROUP BY rep_id ORDER BY average_time_to_close DESC"); let actual_sql = normalize_whitespace(&metric.sql); assert_eq!(actual_sql, expected_sql); - // Verify chart config match metric.chart_config { ChartConfig::Bar(config) => { assert!(config.base.column_label_formats.is_empty()); @@ -591,7 +700,6 @@ mod tests { #[test] fn test_column_label_format_constructors() { - // Test number format let number_format = ColumnLabelFormat::new_number(); assert_eq!(number_format.column_type, "number"); assert_eq!(number_format.style, "number"); @@ -599,20 +707,16 @@ mod tests { assert_eq!(number_format.minimum_fraction_digits, Some(0)); assert_eq!(number_format.maximum_fraction_digits, Some(2)); - // Test string format let string_format = ColumnLabelFormat::new_string(); assert_eq!(string_format.column_type, "string"); assert_eq!(string_format.style, "string"); - assert_eq!(string_format.number_separator_style, None); - // Test date format let date_format = ColumnLabelFormat::new_date(); assert_eq!(date_format.column_type, "date"); assert_eq!(date_format.style, "date"); assert_eq!(date_format.date_format, Some("auto".to_string())); assert_eq!(date_format.is_utc, Some(false)); - // Test boolean format - should be same as string let boolean_format = ColumnLabelFormat::new_boolean(); assert_eq!(boolean_format.column_type, "string"); assert_eq!(boolean_format.style, "string"); @@ -620,7 +724,6 @@ mod tests { #[test] fn test_generate_formats_from_metadata() { - // Create test metadata let metadata = DataMetadata { column_count: 3, row_count: 10, @@ -652,13 +755,10 @@ mod tests { ], }; - // Generate formats let formats = ColumnLabelFormat::generate_formats_from_metadata(&metadata); - // Check we have formats for all columns assert_eq!(formats.len(), 3); - // Check individual formats let id_format = formats.get("id").unwrap(); assert_eq!(id_format.column_type, "number");