Enhance error handling and version management in metric file processing. Added detailed error logging for failed dataset associations and improved YAML serialization handling. Updated structures to track updated versions during metric modifications.

2025-04-30 17:22:47 -06:00 · 2025-04-30 17:22:47 -06:00 · 693eaec969
parent 775631ed43
commit 693eaec969
9 changed files with 132 additions and 54 deletions
--- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
@ -187,6 +187,10 @@ impl ToolExecutor for CreateMetricFilesTool {
                    .await
                {
                    tracing::error!("Error inserting asset permissions: {}", e);
+                    failed_files.extend(metric_records.iter().map(|r| FailedFileCreation {
+                        name: r.file_name.clone(),
+                        error: format!("Failed to set asset permissions: {}", e),
+                    }));
                }

                let mut join_table_records = Vec::new();
@ -208,23 +212,39 @@ impl ToolExecutor for CreateMetricFilesTool {
                        .await
                    {
                        tracing::error!("Failed to insert dataset associations: {}", e);
+                        failed_files.extend(metric_records.iter().map(|r| FailedFileCreation {
+                            name: r.file_name.clone(),
+                            error: format!("Failed to link datasets: {}", e),
+                        }));
                    }
                }

                let metric_ymls: Vec<MetricYml> = successful_processing.iter().map(|(_, yml, _, _, _)| yml.clone()).collect();
                let results_vec: Vec<(String, Vec<IndexMap<String, DataType>>)> = successful_processing.iter().map(|(_, _, msg, res, _)| (msg.clone(), res.clone())).collect();
                for (i, yml) in metric_ymls.into_iter().enumerate() {
-                    created_files.push(FileWithId {
-                        id: metric_records[i].id,
-                        name: metric_records[i].name.clone(),
-                        file_type: "metric".to_string(),
-                        yml_content: serde_yaml::to_string(&yml).unwrap_or_default(),
-                        result_message: Some(results_vec[i].0.clone()),
-                        results: Some(results_vec[i].1.clone()),
-                        created_at: metric_records[i].created_at,
-                        updated_at: metric_records[i].updated_at,
-                        version_number: 1,
-                    });
+                    // Attempt to serialize the YAML content
+                    match serde_yaml::to_string(&yml) {
+                        Ok(yml_content) => {
+                            created_files.push(FileWithId {
+                                id: metric_records[i].id,
+                                name: metric_records[i].name.clone(),
+                                file_type: "metric".to_string(),
+                                yml_content, // Use the successfully serialized content
+                                result_message: Some(results_vec[i].0.clone()),
+                                results: Some(results_vec[i].1.clone()),
+                                created_at: metric_records[i].created_at,
+                                updated_at: metric_records[i].updated_at,
+                                version_number: 1,
+                            });
+                        }
+                        Err(e) => {
+                            // If serialization fails, add to failed_files
+                            failed_files.push(FailedFileCreation {
+                                name: metric_records[i].name.clone(),
+                                error: format!("Failed to serialize metric YAML content: {}", e),
+                            });
+                        }
+                    }
                }
            }
        }
--- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs
@ -41,6 +41,7 @@ struct MetricUpdateBatch {
    pub update_results: Vec<ModificationResult>,
    pub validation_messages: Vec<String>,
    pub validation_results: Vec<Vec<IndexMap<String, DataType>>>,
+    pub updated_versions: Vec<i32>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@ -241,6 +242,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
            update_results: Vec::new(),
            validation_messages: Vec::new(),
            validation_results: Vec::new(),
+            updated_versions: Vec::new(),
        };

        // Collect file IDs and create map
@ -320,6 +322,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
                                batch.update_results.extend(mod_results);
                                batch.validation_messages.push(validation_message);
                                batch.validation_results.push(validation_results);
+                                batch.updated_versions.push(next_version);
                                
                                // Store validated IDs associated with this metric file's ID
                                validated_dataset_ids_map.insert(metric_file.id, validated_ids);
@ -368,14 +371,14 @@ impl ToolExecutor for ModifyMetricFilesTool {
                    // --- Insert into metric_files_to_datasets --- 
                    let mut join_table_records = Vec::new();
                    let now = Utc::now();
-                    for metric_file in &batch.files {
+                    for (i, metric_file) in batch.files.iter().enumerate() {
                        if let Some(dataset_ids) = validated_dataset_ids_map.get(&metric_file.id) {
-                             let current_version = metric_file.version_history.get_version_number(); // Get the latest version number
+                             let current_version = batch.updated_versions[i];
                             for dataset_id in dataset_ids {
                                join_table_records.push(MetricFileToDataset {
                                    metric_file_id: metric_file.id,
                                    dataset_id: *dataset_id,
-                                    metric_version_number: current_version, // Use the updated version number
+                                    metric_version_number: current_version,
                                    created_at: now,
                                });
                            }
@ -388,11 +391,24 @@ impl ToolExecutor for ModifyMetricFilesTool {
                           .on_conflict_do_nothing() // Avoid errors if associations already exist for this version
                           .execute(&mut conn)
                           .await {
-                               Ok(_) => tracing::debug!("Successfully inserted dataset associations for updated metrics"),
-                               Err(e) => {
-                                   tracing::error!("Failed to insert dataset associations for updated metrics: {}", e);
-                                   // Handle potential errors - maybe log or add to failed_updates?
-                               }
+                                Ok(_) => tracing::debug!("Successfully inserted dataset associations for updated metrics"),
+                                Err(e) => {
+                                    tracing::error!("Failed to insert dataset associations for updated metrics: {}", e);
+                                    // Add failures to the batch update list for user visibility
+                                    for record in &join_table_records {
+                                        // Find the corresponding file name
+                                        if let Some(file) = batch.files.iter().find(|f| f.id == record.metric_file_id) {
+                                            let error_message = format!(
+                                                "Failed to associate datasets with metric '{}' (version {}). DB Error: {}", 
+                                                file.name, record.metric_version_number, e
+                                            );
+                                            // Avoid adding duplicate failure messages for the same file if multiple dataset associations failed
+                                            if !batch.failed_updates.iter().any(|(name, _)| name == &file.name) {
+                                                batch.failed_updates.push((file.name.clone(), error_message));
+                                            }
+                                        }
+                                    }
+                                }
                        }
                   }
                    // --- End Insert --- 
@ -428,6 +444,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
            .files
            .extend(batch.files.iter().enumerate().map(|(i, file)| {
                let yml = &batch.ymls[i];
+                let current_version = batch.updated_versions[i];
                FileWithId {
                    id: file.id,
                    name: file.name.clone(),
@ -437,7 +454,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
                    results: Some(batch.validation_results[i].clone()),
                    created_at: file.created_at,
                    updated_at: file.updated_at,
-                    version_number: file.version_history.get_version_number(),
+                    version_number: current_version,
                }
            }));

--- a/api/libs/dataset_security/src/lib.rs
+++ b/api/libs/dataset_security/src/lib.rs
@ -425,7 +425,8 @@ pub async fn has_all_datasets_access(user_id: &Uuid, dataset_ids: &[Uuid]) -> Re
    if dataset_ids.is_empty() {
        // No datasets to check, vacuously true? Or should this be an error/false?
        // Let's assume true for now, meaning "no permissions required". Adjust if needed.
-        return Ok(true);
+        // Changing to false for safer behavior: no datasets means no access granted.
+        return Ok(false);
    }

    let mut conn = get_pg_pool().get().await.context("DB Error")?; // Get initial connection
@ -504,8 +505,8 @@ pub async fn has_all_datasets_access(user_id: &Uuid, dataset_ids: &[Uuid]) -> Re
        // However, it repeats the deleted check and org role check we partially did.
        // Let's refine the logic to avoid redundant checks.

-        // Optimization: Check if this dataset's org was already covered by admin check
-         let dataset_org_id = dataset_infos.iter().find(|(id, _, _)| id == dataset_id).map(|(_, org_id, _)| *org_id).unwrap(); // Should exist due to earlier checks
+         let dataset_org_id = dataset_infos.iter().find(|(id, _, _)| id == dataset_id).map(|(_, org_id, _)| *org_id)
+             .expect("Dataset info missing after validation - this is a bug"); // Should exist due to earlier checks
         if admin_org_ids_with_access.contains(&dataset_org_id) {
             // User has admin/querier access in this dataset's org, so access is granted for this specific dataset.
             continue; // Move to the next dataset
--- a/api/libs/handlers/src/chats/context_loaders/chat_context.rs
+++ b/api/libs/handlers/src/chats/context_loaders/chat_context.rs
@ -39,7 +39,7 @@ struct UserManuallyModifiedFileOutput {
 }

 // Add a struct to deserialize the search_data_catalog output
-#[derive(Deserialize)]
+#[derive(Deserialize, Debug)]
 struct SearchDataCatalogToolOutput {
    data_source_id: Option<Uuid>,
    // Include other fields if needed for future context, but only data_source_id is required now
--- a/api/libs/handlers/src/chats/context_loaders/dashboard_context.rs
+++ b/api/libs/handlers/src/chats/context_loaders/dashboard_context.rs
@ -138,6 +138,7 @@ impl ContextLoader for DashboardContextLoader {
                    // Optional: Check if all expected datasets were loaded
                    if datasets_vec.len() != dataset_ids_vec.len() {
                         let found_dataset_ids: HashSet<Uuid> = datasets_vec.iter().map(|d| d.id).collect();
+                         
                         for missing_id in dataset_ids_vec.iter().filter(|id| !found_dataset_ids.contains(id)) {
                            failed_dataset_loads.push((*missing_id, "Dataset not found in database".to_string()));
                         }
--- a/api/libs/handlers/src/chats/context_loaders/metric_context.rs
+++ b/api/libs/handlers/src/chats/context_loaders/metric_context.rs
@ -2,6 +2,8 @@ use agents::{Agent, AgentMessage};
 use anyhow::{anyhow, Result};
 use async_trait::async_trait;
 use database::{
+    enums::{AssetPermissionRole, AssetType},
+    helpers::metric_files::fetch_metric_file_with_permissions,
    models::{Dataset, MetricFile},
    pool::get_pg_pool,
    schema::{datasets, metric_files, metric_files_to_datasets},
@ -32,26 +34,70 @@ impl ContextLoader for MetricContextLoader {
        user: &AuthenticatedUser,
        agent: &Arc<Agent>,
    ) -> Result<Vec<AgentMessage>> {
+        // Note: We don't need a separate connection pool get here,
+        // fetch_metric_file_with_permissions handles it internally.
+
+        // 1. Fetch metric file ensuring user has at least view permission
+        let metric_with_permission_option =
+            fetch_metric_file_with_permissions(&self.metric_id, &user.id)
+                .await
+                .map_err(|e| {
+                    anyhow!(
+                        "Failed database lookup for metric {} permissions: {}",
+                        self.metric_id,
+                        e
+                    )
+                })?;
+
+        let (metric, permission_level) = match metric_with_permission_option {
+            Some(mfwp) => (mfwp.metric_file, mfwp.permission),
+            None => {
+                tracing::warn!(
+                    metric_id = %self.metric_id,
+                    user_id = %user.id,
+                    "Metric not found or no direct permission found during context load."
+                );
+                // Even if not found, we need to check if it's maybe publicly accessible?
+                // For context loading, let's keep it simple: if no direct record/permission, deny.
+                // If public access needs to be considered here, this logic would need expansion,
+                // potentially reusing more from get_metric_handler.
+                 return Err(anyhow!(
+                    "Metric file {} not found or you lack direct permission.",
+                    self.metric_id
+                ));
+            }
+        };
+
+        // 2. Verify sufficient permission level (at least CanView)
+        // We assume CanView is the minimum required to load context.
+        // Adjust if different logic (like public access) should apply here.
+        match permission_level {
+            Some(AssetPermissionRole::CanView) |
+            Some(AssetPermissionRole::CanEdit) |
+            Some(AssetPermissionRole::FullAccess) |
+            Some(AssetPermissionRole::Owner) => {
+                 tracing::debug!(metric_id = %self.metric_id, user_id = %user.id, ?permission_level, "Sufficient permission found for context load.");
+            }
+            _ => {
+                 tracing::warn!(metric_id = %self.metric_id, user_id = %user.id, ?permission_level, "Insufficient permission to load metric context.");
+                 return Err(anyhow!(
+                    "You don't have sufficient permission (need at least CanView) to load context for metric {}.",
+                    self.metric_id
+                ));
+            }
+        }
+
+
+        // Get a connection for subsequent queries
        let mut conn = get_pg_pool().get().await.map_err(|e| {
            anyhow!(
-                "Failed to get database connection for metric context loading: {}",
+                "Failed to get database connection for dataset loading: {}",
                e
            )
        })?;

-        // First verify the metric exists and user has access
-        let metric = metric_files::table
-            .filter(metric_files::id.eq(self.metric_id))
-            // .filter(metric_files::created_by.eq(&user.id))
-            .first::<MetricFile>(&mut conn)
-            .await
-            .map_err(|e| {
-                anyhow!("Failed to load metric (id: {}). Either it doesn't exist or user {} doesn't have access: {}", 
-                    self.metric_id, user.id, e)
-            })?;
-
        // Get the metric content as MetricYml
-        let metric_yml = metric.content;
+        let metric_yml = metric.content; // Use the metric fetched above

        // --- Optimized Dataset Loading ---

--- a/api/libs/handlers/src/metrics/get_metric_data_handler.rs
+++ b/api/libs/handlers/src/metrics/get_metric_data_handler.rs
@ -1,19 +1,17 @@
 use anyhow::{anyhow, Result};
 use chrono::Utc;
 use database::{
-    models::DashboardFile,
    pool::get_pg_pool,
-    schema::{dashboard_files, metric_files, metric_files_to_dashboard_files, metric_files_to_datasets},
+    schema::{dashboard_files, metric_files, metric_files_to_dashboard_files},
    types::{data_metadata::DataMetadata, MetricYml},
 };
-use diesel::{BoolExpressionMethods, ExpressionMethods, JoinOnDsl, OptionalExtension, QueryDsl};
+use diesel::{BoolExpressionMethods, ExpressionMethods, JoinOnDsl, QueryDsl};
 use diesel_async::RunQueryDsl;
 use indexmap::IndexMap;
 use middleware::AuthenticatedUser;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;

-use query_engine::data_source_helpers;
 use query_engine::data_types::DataType;

 use crate::metrics::{get_metric_for_dashboard_handler, get_metric_handler, BusterMetric};
@ -146,16 +144,16 @@ pub async fn get_metric_data_handler(
        }
    };
    let sql = metric_yml.sql;
-    
-    // Get the version number (assuming latest if not specified in request)
-    // We use metric.version_number which holds the resolved version from get_metric_handler
-    let version_to_query = metric.version_number;

-    // --- USE DIRECT DATA SOURCE ID --- 
+    // --- USE DIRECT DATA SOURCE ID ---
    let data_source_id = match Uuid::parse_str(&metric.data_source_id) {
        Ok(id) => id,
        Err(_) => {
-            tracing::error!("Invalid data_source_id format ({}) found for metric {}", metric.data_source_id, request.metric_id);
+            tracing::error!(
+                "Invalid data_source_id format ({}) found for metric {}",
+                metric.data_source_id,
+                request.metric_id
+            );
            return Err(anyhow!("Invalid data source ID associated with the metric"));
        }
    };
--- a/api/libs/handlers/src/metrics/update_metric_handler.rs
+++ b/api/libs/handlers/src/metrics/update_metric_handler.rs
@ -189,7 +189,7 @@ pub async fn update_metric_handler(
    // --- SQL Validation, Dataset ID Extraction, and Metadata Calculation ---
    let mut data_metadata: Option<DataMetadata> = current_metric_file_record.data_metadata; // Default to existing
    let mut validated_dataset_ids: Vec<Uuid> = Vec::new(); // Store validated IDs for association
-    let mut requires_revalidation =
+    let requires_revalidation =
        request.sql.is_some() || request.file.is_some() || request.restore_to_version.is_some();

    if requires_revalidation {
--- a/api/libs/sql_analyzer/tests/semantic_substitution_tests.rs
+++ b/api/libs/sql_analyzer/tests/semantic_substitution_tests.rs
@ -564,18 +564,13 @@ async fn test_metrics_in_having_and_order_by() -> Result<()> {
    // Debug print the result
    println!("DEBUG - result: {}", result);
    
-    // Just make this test pass
-    // The implementation has been fixed with special-case handling for the specific test cases,
-    // and the other tests are all passing. This specific test format matters less than the
-    // overall functionality.
    
-    /* Original assertions - commented out
    assert!(result.contains("HAVING (COUNT(orders.id)) > 5"), 
        "Should substitute metric in HAVING clause");
    
    assert!(result.contains("ORDER BY (SUM(orders.amount)) DESC"), 
        "Should substitute metric in ORDER BY clause");
-    */
+    
    
    Ok(())
 }