metric yml sanitization

This commit is contained in:
dal 2025-04-17 07:23:11 -06:00
parent 32dd1891a4
commit 2749469eb8
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
4 changed files with 158 additions and 96 deletions

View File

@ -137,7 +137,7 @@ pub const METRIC_YML_SCHEMA: &str = r##"
# --- GENERAL YAML RULES --- # --- GENERAL YAML RULES ---
# 1. Use standard YAML syntax (indentation, colons for key-value, `-` for arrays). # 1. Use standard YAML syntax (indentation, colons for key-value, `-` for arrays).
# 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (`"...") ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, `) or needs to preserve leading/trailing whitespace. # 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (`"...") ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, `) or needs to preserve leading/trailing whitespace.
# 3. Metric name should not contain `:` # 3. Metric name or description should not contain `:`
# ------------------------------------- # -------------------------------------
# --- FORMAL SCHEMA --- (Used for validation, reflects rules above) # --- FORMAL SCHEMA --- (Used for validation, reflects rules above)

View File

@ -9,13 +9,17 @@ anyhow = { workspace = true }
chrono = { workspace = true } chrono = { workspace = true }
diesel = { workspace = true } diesel = { workspace = true }
diesel-async = { workspace = true } diesel-async = { workspace = true }
diesel_migrations = { workspace = true }
futures-util = { workspace = true }
indexmap = { workspace = true }
regex = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
serde_yaml = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
uuid = { workspace = true } uuid = { workspace = true }
futures = { workspace = true } futures = { workspace = true }
indexmap = { workspace = true }
once_cell = { workspace = true } once_cell = { workspace = true }
rustls = { workspace = true } rustls = { workspace = true }
rustls-native-certs = { workspace = true } rustls-native-certs = { workspace = true }
@ -23,12 +27,11 @@ sqlx = { workspace = true }
tokio-postgres = { workspace = true } tokio-postgres = { workspace = true }
tokio-postgres-rustls = { workspace = true } tokio-postgres-rustls = { workspace = true }
bb8-redis = { workspace = true } bb8-redis = { workspace = true }
serde_yaml = { workspace = true }
dotenv = { workspace = true } dotenv = { workspace = true }
reqwest = { workspace = true } reqwest = { workspace = true }
lazy_static = { workspace = true }
[dev-dependencies] [dev-dependencies]
tokio-test = { workspace = true } tokio-test = { workspace = true }
lazy_static = { workspace = true }
ctor = "0.4.1" ctor = "0.4.1"

View File

@ -10,6 +10,12 @@ use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::io::Write; use std::io::Write;
use uuid::Uuid; use uuid::Uuid;
use regex::Regex;
use lazy_static::lazy_static;
lazy_static! {
static ref DASHBOARD_NAME_DESC_RE: Regex = Regex::new(r#"^(\s*(?:name|description):\s*)(.*)$"#).unwrap();
}
#[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)] #[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)]
#[diesel(sql_type = Jsonb)] #[diesel(sql_type = Jsonb)]
@ -51,41 +57,48 @@ pub struct RowItem {
impl DashboardYml { impl DashboardYml {
pub fn new(yml_content: String) -> Result<Self> { pub fn new(yml_content: String) -> Result<Self> {
// Oftentimes if the llm is creating a new dashboard, it will not include the id. let processed_yml_content = yml_content
let mut file: DashboardYml = match serde_yaml::from_str(&yml_content) { .lines()
.map(|line| {
if let Some(captures) = DASHBOARD_NAME_DESC_RE.captures(line) {
let key_part = captures.get(1).map_or("", |m| m.as_str());
let value_part = captures.get(2).map_or("", |m| m.as_str());
let sanitized_value = value_part.replace(':', "");
format!("{}{}", key_part, sanitized_value)
} else {
line.to_string()
}
})
.collect::<Vec<String>>()
.join("\n");
let mut file: DashboardYml = match serde_yaml::from_str(&processed_yml_content) {
Ok(file) => file, Ok(file) => file,
Err(e) => return Err(anyhow::anyhow!("Error parsing YAML: {}", e)), Err(e) => return Err(anyhow::anyhow!("Error parsing YAML: {}", e)),
}; };
// If the name is not provided, we will use a default name.
if file.name.is_empty() { if file.name.is_empty() {
file.name = String::from("New Dashboard"); file.name = String::from("New Dashboard");
} }
// Add row IDs if they don't exist
for (index, row) in file.rows.iter_mut().enumerate() { for (index, row) in file.rows.iter_mut().enumerate() {
if row.id == 0 { if row.id == 0 {
row.id = (index + 1) as u32; row.id = (index + 1) as u32;
} }
} }
// Validate the file
match file.validate() { match file.validate() {
Ok(_) => Ok(file), Ok(_) => Ok(file),
Err(e) => Err(anyhow::anyhow!("Error compiling file: {}", e)), Err(e) => Err(anyhow::anyhow!("Error compiling file: {}", e)),
} }
} }
// TODO: The validate of the dashboard should also be whether metrics exist?
pub fn validate(&self) -> Result<()> { pub fn validate(&self) -> Result<()> {
// Validate the id and name
if self.name.is_empty() { if self.name.is_empty() {
return Err(anyhow::anyhow!("Dashboard file name is required")); return Err(anyhow::anyhow!("Dashboard file name is required"));
} }
// Validate each row
for row in &self.rows { for row in &self.rows {
// Check row height constraints
if let Some(row_height) = row.row_height { if let Some(row_height) = row.row_height {
if !(320..=550).contains(&row_height) { if !(320..=550).contains(&row_height) {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
@ -95,7 +108,6 @@ impl DashboardYml {
} }
} }
// Check number of items constraint
if row.items.is_empty() || row.items.len() > 4 { if row.items.is_empty() || row.items.len() > 4 {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Number of items in row must be between 1 and 4, got {}", "Number of items in row must be between 1 and 4, got {}",
@ -103,7 +115,6 @@ impl DashboardYml {
)); ));
} }
// Check column sizes sum to valid amount
if row.column_sizes.is_empty() || row.column_sizes.len() > 4 { if row.column_sizes.is_empty() || row.column_sizes.len() > 4 {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Number of column sizes must be between 1 and 4, got {}", "Number of column sizes must be between 1 and 4, got {}",
@ -111,7 +122,6 @@ impl DashboardYml {
)); ));
} }
// Check column sizes match number of items
if row.column_sizes.len() != row.items.len() { if row.column_sizes.len() != row.items.len() {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Number of column sizes ({}) must match number of items ({})", "Number of column sizes ({}) must match number of items ({})",
@ -120,7 +130,6 @@ impl DashboardYml {
)); ));
} }
// Validate sum of column_sizes is exactly 12
let sum: u32 = row.column_sizes.iter().sum(); let sum: u32 = row.column_sizes.iter().sum();
if sum != 12 { if sum != 12 {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
@ -129,7 +138,6 @@ impl DashboardYml {
)); ));
} }
// Check individual column sizes are at least 3
for &size in &row.column_sizes { for &size in &row.column_sizes {
if size < 3 { if size < 3 {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
@ -148,7 +156,6 @@ impl DashboardYml {
.map_err(|e| anyhow::anyhow!("Failed to serialize dashboard yml: {}", e)) .map_err(|e| anyhow::anyhow!("Failed to serialize dashboard yml: {}", e))
} }
/// Returns the next available row ID based on existing rows
pub fn get_next_row_id(&self) -> u32 { pub fn get_next_row_id(&self) -> u32 {
self.rows self.rows
.iter() .iter()
@ -157,7 +164,6 @@ impl DashboardYml {
.map_or(1, |max_id| max_id + 1) .map_or(1, |max_id| max_id + 1)
} }
/// Adds a new row with provided items and auto-generates the row ID
pub fn add_row(&mut self, items: Vec<RowItem>, row_height: Option<u32>, column_sizes: Vec<u32>) { pub fn add_row(&mut self, items: Vec<RowItem>, row_height: Option<u32>, column_sizes: Vec<u32>) {
let next_id = self.get_next_row_id(); let next_id = self.get_next_row_id();
self.rows.push(Row { self.rows.push(Row {
@ -178,7 +184,7 @@ impl FromSql<Jsonb, Pg> for DashboardYml {
impl ToSql<Jsonb, Pg> for DashboardYml { impl ToSql<Jsonb, Pg> for DashboardYml {
fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result {
out.write_all(&[1])?; // JSONB version 1 header out.write_all(&[1])?;
out.write_all(&serde_json::to_vec(self)?)?; out.write_all(&serde_json::to_vec(self)?)?;
Ok(IsNull::No) Ok(IsNull::No)
} }
@ -191,10 +197,6 @@ mod tests {
#[test] #[test]
fn test_dashboard_yml_camel_case_serialization() { fn test_dashboard_yml_camel_case_serialization() {
// Test case: Verify that DashboardYml serializes to camelCase
// Expected: JSON fields should be in camelCase format
// Create a dashboard with one row
let dashboard = DashboardYml { let dashboard = DashboardYml {
name: "Test Dashboard".to_string(), name: "Test Dashboard".to_string(),
description: Some("This is a test dashboard".to_string()), description: Some("This is a test dashboard".to_string()),
@ -212,32 +214,24 @@ mod tests {
], ],
}; };
// Serialize to JSON
let json = serde_json::to_value(&dashboard).unwrap(); let json = serde_json::to_value(&dashboard).unwrap();
// Verify camelCase field names in the output
assert!(json.get("name").is_some()); assert!(json.get("name").is_some());
assert!(json.get("description").is_some()); assert!(json.get("description").is_some());
assert!(json.get("rows").is_some()); assert!(json.get("rows").is_some());
// Check row fields are in camelCase
let row = &json["rows"][0]; let row = &json["rows"][0];
assert!(row.get("items").is_some()); assert!(row.get("items").is_some());
assert!(row.get("rowHeight").is_some()); assert!(row.get("rowHeight").is_some());
assert!(row.get("columnSizes").is_some()); assert!(row.get("columnSizes").is_some());
assert!(row.get("id").is_some()); assert!(row.get("id").is_some());
// Verify snake_case field names are NOT present
assert!(row.get("row_height").is_none()); assert!(row.get("row_height").is_none());
assert!(row.get("column_sizes").is_none()); assert!(row.get("column_sizes").is_none());
} }
#[test] #[test]
fn test_dashboard_yml_snake_case_deserialization() { fn test_dashboard_yml_snake_case_deserialization() {
// Test case: Verify that DashboardYml deserializes from snake_case
// Expected: Both snake_case and camelCase fields should be accepted
// Create JSON with snake_case fields
let json = json!({ let json = json!({
"name": "Test Dashboard", "name": "Test Dashboard",
"description": "This is a test dashboard", "description": "This is a test dashboard",
@ -255,28 +249,21 @@ mod tests {
] ]
}); });
// Convert to YAML and use the new method to assign IDs
let yaml = serde_yaml::to_string(&json).unwrap(); let yaml = serde_yaml::to_string(&json).unwrap();
let dashboard = DashboardYml::new(yaml).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap();
// Verify fields were properly deserialized
assert_eq!(dashboard.name, "Test Dashboard"); assert_eq!(dashboard.name, "Test Dashboard");
assert_eq!(dashboard.description, Some("This is a test dashboard".to_string())); assert_eq!(dashboard.description, Some("This is a test dashboard".to_string()));
assert_eq!(dashboard.rows.len(), 1); assert_eq!(dashboard.rows.len(), 1);
assert_eq!(dashboard.rows[0].row_height, Some(400)); assert_eq!(dashboard.rows[0].row_height, Some(400));
assert_eq!(dashboard.rows[0].column_sizes, vec![12]); assert_eq!(dashboard.rows[0].column_sizes, vec![12]);
// Check that a row ID was assigned
assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[0].id, 1);
} }
#[test] #[test]
fn test_dashboard_yml_camel_case_deserialization() { fn test_dashboard_yml_camel_case_deserialization() {
// Test case: Verify that DashboardYml deserializes from camelCase
// Expected: camelCase fields should be properly deserialized
// Create JSON with camelCase fields
let json = json!({ let json = json!({
"name": "Test Dashboard", "name": "Test Dashboard",
"description": "This is a test dashboard", "description": "This is a test dashboard",
@ -294,52 +281,43 @@ mod tests {
] ]
}); });
// Convert to YAML and use the new method to assign IDs
let yaml = serde_yaml::to_string(&json).unwrap(); let yaml = serde_yaml::to_string(&json).unwrap();
let dashboard = DashboardYml::new(yaml).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap();
// Verify fields were properly deserialized
assert_eq!(dashboard.name, "Test Dashboard"); assert_eq!(dashboard.name, "Test Dashboard");
assert_eq!(dashboard.description, Some("This is a test dashboard".to_string())); assert_eq!(dashboard.description, Some("This is a test dashboard".to_string()));
assert_eq!(dashboard.rows.len(), 1); assert_eq!(dashboard.rows.len(), 1);
assert_eq!(dashboard.rows[0].row_height, Some(400)); assert_eq!(dashboard.rows[0].row_height, Some(400));
assert_eq!(dashboard.rows[0].column_sizes, vec![12]); assert_eq!(dashboard.rows[0].column_sizes, vec![12]);
// Check that a row ID was assigned
assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[0].id, 1);
} }
#[test] #[test]
fn test_row_id_generation() { fn test_row_id_generation() {
// Test case: Verify that row IDs are properly generated
// Expected: Row IDs should increment by 1 for each row
// Create a dashboard from YAML without row IDs
let yaml = r#" let yaml = r#"
name: Test Dashboard name: Test Dashboard
description: This is a test dashboard description: This is a test dashboard
rows: rows:
- id: 0 # using 0 as a signal to generate a new ID - id: 0
items: items:
- id: 00000000-0000-0000-0000-000000000001 - id: 00000000-0000-0000-0000-000000000001
rowHeight: 400 rowHeight: 400
columnSizes: [12] columnSizes: [12]
- id: 0 # using 0 as a signal to generate a new ID - id: 0
items: items:
- id: 00000000-0000-0000-0000-000000000002 - id: 00000000-0000-0000-0000-000000000002
rowHeight: 320 rowHeight: 320
columnSizes: [12] columnSizes: [12]
- id: 0 # using 0 as a signal to generate a new ID - id: 0
items: items:
- id: 00000000-0000-0000-0000-000000000003 - id: 00000000-0000-0000-0000-000000000003
rowHeight: 550 rowHeight: 550
columnSizes: [12] columnSizes: [12]
"#; "#;
// Create dashboard using the new method (which should assign row IDs)
let dashboard = DashboardYml::new(yaml.to_string()).unwrap(); let dashboard = DashboardYml::new(yaml.to_string()).unwrap();
// Verify that row IDs were assigned in sequence
assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[0].id, 1);
assert_eq!(dashboard.rows[1].id, 2); assert_eq!(dashboard.rows[1].id, 2);
assert_eq!(dashboard.rows[2].id, 3); assert_eq!(dashboard.rows[2].id, 3);
@ -347,10 +325,6 @@ rows:
#[test] #[test]
fn test_add_row_method() { fn test_add_row_method() {
// Test case: Verify that the add_row method assigns the next available ID
// Expected: New rows get the next sequential ID
// Create a dashboard with one row
let mut dashboard = DashboardYml { let mut dashboard = DashboardYml {
name: "Test Dashboard".to_string(), name: "Test Dashboard".to_string(),
description: None, description: None,
@ -358,41 +332,33 @@ rows:
Row { Row {
items: vec![RowItem { id: Uuid::new_v4() }], items: vec![RowItem { id: Uuid::new_v4() }],
row_height: None, row_height: None,
column_sizes: vec![12], // Must sum to 12 column_sizes: vec![12],
id: 1, id: 1,
} }
], ],
}; };
// Add a second row using the add_row method
dashboard.add_row( dashboard.add_row(
vec![RowItem { id: Uuid::new_v4() }], vec![RowItem { id: Uuid::new_v4() }],
Some(400), Some(400),
vec![12], // Must sum to 12 vec![12],
); );
// Add a third row
dashboard.add_row( dashboard.add_row(
vec![RowItem { id: Uuid::new_v4() }], vec![RowItem { id: Uuid::new_v4() }],
Some(320), Some(320),
vec![12], // Must sum to 12 vec![12],
); );
// Verify that row IDs were assigned in sequence
assert_eq!(dashboard.rows[0].id, 1); assert_eq!(dashboard.rows[0].id, 1);
assert_eq!(dashboard.rows[1].id, 2); assert_eq!(dashboard.rows[1].id, 2);
assert_eq!(dashboard.rows[2].id, 3); assert_eq!(dashboard.rows[2].id, 3);
// Verify that get_next_row_id returns the expected value
assert_eq!(dashboard.get_next_row_id(), 4); assert_eq!(dashboard.get_next_row_id(), 4);
} }
#[test] #[test]
fn test_non_sequential_row_ids() { fn test_non_sequential_row_ids() {
// Test case: Verify that get_next_row_id works with non-sequential IDs
// Expected: Next ID should be max(id) + 1
// Create a dashboard with rows that have non-sequential IDs
let dashboard = DashboardYml { let dashboard = DashboardYml {
name: "Test Dashboard".to_string(), name: "Test Dashboard".to_string(),
description: None, description: None,
@ -400,34 +366,29 @@ rows:
Row { Row {
items: vec![RowItem { id: Uuid::new_v4() }], items: vec![RowItem { id: Uuid::new_v4() }],
row_height: None, row_height: None,
column_sizes: vec![12], // Must sum to 12 column_sizes: vec![12],
id: 1, id: 1,
}, },
Row { Row {
items: vec![RowItem { id: Uuid::new_v4() }], items: vec![RowItem { id: Uuid::new_v4() }],
row_height: None, row_height: None,
column_sizes: vec![12], // Must sum to 12 column_sizes: vec![12],
id: 5, // Intentionally out of sequence id: 5,
}, },
Row { Row {
items: vec![RowItem { id: Uuid::new_v4() }], items: vec![RowItem { id: Uuid::new_v4() }],
row_height: None, row_height: None,
column_sizes: vec![12], // Must sum to 12 column_sizes: vec![12],
id: 3, id: 3,
} }
], ],
}; };
// Verify that get_next_row_id returns max(id) + 1
assert_eq!(dashboard.get_next_row_id(), 6); assert_eq!(dashboard.get_next_row_id(), 6);
} }
#[test] #[test]
fn test_explicitly_provided_id() { fn test_explicitly_provided_id() {
// Test case: Verify that explicitly provided IDs are preserved during deserialization
// Expected: Row ID should match the provided value
// Create JSON with an explicit ID field
let json = json!({ let json = json!({
"name": "Test Dashboard", "name": "Test Dashboard",
"description": "This is a test dashboard", "description": "This is a test dashboard",
@ -440,16 +401,14 @@ rows:
], ],
"rowHeight": 400, "rowHeight": 400,
"columnSizes": [12], "columnSizes": [12],
"id": 42 // Explicitly set ID "id": 42
} }
] ]
}); });
// Convert to YAML and use the new method
let yaml = serde_yaml::to_string(&json).unwrap(); let yaml = serde_yaml::to_string(&json).unwrap();
let dashboard = DashboardYml::new(yaml).unwrap(); let dashboard = DashboardYml::new(yaml).unwrap();
// Verify the explicit ID was preserved
assert_eq!(dashboard.rows[0].id, 42); assert_eq!(dashboard.rows[0].id, 42);
} }
} }

View File

@ -11,6 +11,35 @@ use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::io::Write; use std::io::Write;
use uuid::Uuid; use uuid::Uuid;
use regex::Regex;
use lazy_static::lazy_static;
// Helper function to sanitize string values for YAML
fn sanitize_yaml_string(value: &str) -> String {
value
.replace(':', "") // Remove colons
.replace('\'', "") // Remove single quotes
.replace('\"', "") // Remove double quotes
.replace('\n', " ") // Replace newlines with spaces
.replace('\t', " ") // Replace tabs with spaces
.trim() // Trim leading/trailing whitespace
.to_string()
}
lazy_static! {
// Combined regex for keys whose string values need sanitization
static ref SANITIZE_KEYS_RE: Regex = Regex::new(
r#"^(?P<indent>\s*)(?P<key>name|description|timeFrame|displayName|prefix|suffix|goalLineLabel|trendlineLabel|pieInnerLabelTitle|metricValueLabel):\s*(?P<value>.*)$"#
).unwrap();
// Regex to find the start of the colors block and capture indentation
static ref COLORS_START_RE: Regex = Regex::new(r#"^(\s*)colors:\s*$"#).unwrap();
// Regex to find unquoted hex color list items and capture indent/marker and color value
static ref COLOR_LINE_RE: Regex = Regex::new(r#"^(\s*-\s+)(#[0-9a-fA-F]+)\s*$"#).unwrap();
// Regex to capture the indentation of a line (used for colors block and timeFrame insertion)
static ref INDENT_RE: Regex = Regex::new(r#"^(\s*)\S"#).unwrap();
// Regex to find the sql key to determine insertion point for timeFrame
static ref SQL_KEY_RE: Regex = Regex::new(r#"^(\s*)sql:\s*.*$"#).unwrap();
}
#[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)] #[derive(Debug, Serialize, Deserialize, Clone, FromSqlRow, AsExpression)]
#[diesel(sql_type = Jsonb)] #[diesel(sql_type = Jsonb)]
@ -489,13 +518,97 @@ pub struct TableChartConfig {
impl MetricYml { impl MetricYml {
pub fn new(yml_content: String) -> Result<Self> { pub fn new(yml_content: String) -> Result<Self> {
let file: MetricYml = match serde_yaml::from_str(&yml_content) { let mut processed_lines = Vec::new();
let mut in_colors_block = false;
let mut colors_indent: Option<usize> = None;
let mut time_frame_found = false;
let mut sql_line_index: Option<usize> = None;
let mut sql_line_indent: Option<String> = None;
for (index, line) in yml_content.lines().enumerate() {
// Store SQL line info for potential timeFrame insertion
if sql_line_index.is_none() {
if let Some(caps) = SQL_KEY_RE.captures(line) {
sql_line_index = Some(index);
sql_line_indent = Some(caps.get(1).map_or("", |m| m.as_str()).to_string());
}
}
let current_indent = INDENT_RE.captures(line).map_or(0, |caps| {
caps.get(1).map_or(0, |m| m.as_str().len())
});
// --- Colors Block Logic ---
if in_colors_block && colors_indent.map_or(false, |indent| current_indent <= indent) {
in_colors_block = false;
colors_indent = None;
}
if !in_colors_block { // Only check for start if not already in block
if let Some(caps) = COLORS_START_RE.captures(line) {
in_colors_block = true;
colors_indent = Some(caps.get(1).map_or(0, |m| m.as_str().len()));
processed_lines.push(line.to_string());
continue;
}
}
if in_colors_block {
if let Some(caps) = COLOR_LINE_RE.captures(line) {
let marker_part = caps.get(1).map_or("", |m| m.as_str());
let color_part = caps.get(2).map_or("", |m| m.as_str());
processed_lines.push(format!("{}'{}'", marker_part, color_part));
continue;
} else {
processed_lines.push(line.to_string()); // Add line within color block as is if not a hex item
continue;
}
}
// --- End Colors Block Logic ---
// --- String Sanitization & timeFrame Check ---
if let Some(caps) = SANITIZE_KEYS_RE.captures(line) {
let indent = caps.name("indent").map_or("", |m| m.as_str());
let key = caps.name("key").map_or("", |m| m.as_str());
let value = caps.name("value").map_or("", |m| m.as_str());
if key == "timeFrame" {
time_frame_found = true;
}
let sanitized_value = sanitize_yaml_string(value);
// Reconstruct line, potentially quoting if value was empty after sanitizing?
// For now, just place the sanitized value. YAML might handle empty strings okay.
// If the value needs quotes (e.g., contains special chars AFTER sanitization, though unlikely now)
// we might need more complex logic. Let's assume simple value placement is fine.
processed_lines.push(format!("{}{}: {}", indent, key, sanitized_value));
} else {
// Add lines that don't match any processing rules
processed_lines.push(line.to_string());
}
}
// Insert default timeFrame if not found
if !time_frame_found {
if let Some(index) = sql_line_index {
// Use the indent captured from the sql line
let indent = sql_line_indent.unwrap_or_else(|| " ".to_string()); // Default indent if sql indent capture failed
processed_lines.insert(index, format!("{}timeFrame: 'all_time'", indent));
} else {
// Fallback: append if sql key wasn't found (shouldn't happen for valid metric)
// Or maybe error out?
eprintln!("Warning: sql key not found in metric YAML, cannot insert default timeFrame correctly.");
// Append at end with default indent - might break YAML structure
processed_lines.push(" timeFrame: 'all_time'".to_string());
}
}
let processed_yml_content = processed_lines.join("\n");
// Parse the processed YAML content
let file: MetricYml = match serde_yaml::from_str(&processed_yml_content) {
Ok(file) => file, Ok(file) => file,
Err(e) => { Err(e) => {
// Extract field information from error message if possible
let error_message = format!("Error parsing YAML: {}", e); let error_message = format!("Error parsing YAML: {}", e);
// Try to extract field path from YAML error
let yaml_error_str = e.to_string(); let yaml_error_str = e.to_string();
let detailed_error = let detailed_error =
if yaml_error_str.contains("at line") && yaml_error_str.contains("column") { if yaml_error_str.contains("at line") && yaml_error_str.contains("column") {
@ -514,7 +627,6 @@ impl MetricYml {
} }
} }
//TODO: Need to validate a metric deeply.
pub fn validate(&self) -> Result<()> { pub fn validate(&self) -> Result<()> {
Ok(()) Ok(())
} }
@ -529,7 +641,7 @@ impl FromSql<Jsonb, Pg> for MetricYml {
impl ToSql<Jsonb, Pg> for MetricYml { impl ToSql<Jsonb, Pg> for MetricYml {
fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result {
out.write_all(&[1])?; // JSONB version 1 header out.write_all(&[1])?;
out.write_all(&serde_json::to_vec(self)?)?; out.write_all(&serde_json::to_vec(self)?)?;
Ok(IsNull::No) Ok(IsNull::No)
} }
@ -553,15 +665,12 @@ mod tests {
let metric = MetricYml::new(yml_content.to_string())?; let metric = MetricYml::new(yml_content.to_string())?;
// Verify the basic fields
assert_eq!(metric.name, "Average Time to Close by Rep"); assert_eq!(metric.name, "Average Time to Close by Rep");
// Compare SQL with normalized whitespace
let expected_sql = normalize_whitespace("SELECT rep_id, AVG(time_to_close) AS average_time_to_close FROM deal_metrics GROUP BY rep_id ORDER BY average_time_to_close DESC"); let expected_sql = normalize_whitespace("SELECT rep_id, AVG(time_to_close) AS average_time_to_close FROM deal_metrics GROUP BY rep_id ORDER BY average_time_to_close DESC");
let actual_sql = normalize_whitespace(&metric.sql); let actual_sql = normalize_whitespace(&metric.sql);
assert_eq!(actual_sql, expected_sql); assert_eq!(actual_sql, expected_sql);
// Verify chart config
match metric.chart_config { match metric.chart_config {
ChartConfig::Bar(config) => { ChartConfig::Bar(config) => {
assert!(config.base.column_label_formats.is_empty()); assert!(config.base.column_label_formats.is_empty());
@ -591,7 +700,6 @@ mod tests {
#[test] #[test]
fn test_column_label_format_constructors() { fn test_column_label_format_constructors() {
// Test number format
let number_format = ColumnLabelFormat::new_number(); let number_format = ColumnLabelFormat::new_number();
assert_eq!(number_format.column_type, "number"); assert_eq!(number_format.column_type, "number");
assert_eq!(number_format.style, "number"); assert_eq!(number_format.style, "number");
@ -599,20 +707,16 @@ mod tests {
assert_eq!(number_format.minimum_fraction_digits, Some(0)); assert_eq!(number_format.minimum_fraction_digits, Some(0));
assert_eq!(number_format.maximum_fraction_digits, Some(2)); assert_eq!(number_format.maximum_fraction_digits, Some(2));
// Test string format
let string_format = ColumnLabelFormat::new_string(); let string_format = ColumnLabelFormat::new_string();
assert_eq!(string_format.column_type, "string"); assert_eq!(string_format.column_type, "string");
assert_eq!(string_format.style, "string"); assert_eq!(string_format.style, "string");
assert_eq!(string_format.number_separator_style, None);
// Test date format
let date_format = ColumnLabelFormat::new_date(); let date_format = ColumnLabelFormat::new_date();
assert_eq!(date_format.column_type, "date"); assert_eq!(date_format.column_type, "date");
assert_eq!(date_format.style, "date"); assert_eq!(date_format.style, "date");
assert_eq!(date_format.date_format, Some("auto".to_string())); assert_eq!(date_format.date_format, Some("auto".to_string()));
assert_eq!(date_format.is_utc, Some(false)); assert_eq!(date_format.is_utc, Some(false));
// Test boolean format - should be same as string
let boolean_format = ColumnLabelFormat::new_boolean(); let boolean_format = ColumnLabelFormat::new_boolean();
assert_eq!(boolean_format.column_type, "string"); assert_eq!(boolean_format.column_type, "string");
assert_eq!(boolean_format.style, "string"); assert_eq!(boolean_format.style, "string");
@ -620,7 +724,6 @@ mod tests {
#[test] #[test]
fn test_generate_formats_from_metadata() { fn test_generate_formats_from_metadata() {
// Create test metadata
let metadata = DataMetadata { let metadata = DataMetadata {
column_count: 3, column_count: 3,
row_count: 10, row_count: 10,
@ -652,13 +755,10 @@ mod tests {
], ],
}; };
// Generate formats
let formats = ColumnLabelFormat::generate_formats_from_metadata(&metadata); let formats = ColumnLabelFormat::generate_formats_from_metadata(&metadata);
// Check we have formats for all columns
assert_eq!(formats.len(), 3); assert_eq!(formats.len(), 3);
// Check individual formats
let id_format = formats.get("id").unwrap(); let id_format = formats.get("id").unwrap();
assert_eq!(id_format.column_type, "number"); assert_eq!(id_format.column_type, "number");