From 5aed7bcc7c8382ff8ea1898c6d690848a2ae33a7 Mon Sep 17 00:00:00 2001 From: dal Date: Tue, 13 May 2025 10:10:43 -0600 Subject: [PATCH] analysis test and generate command --- api/libs/sql_analyzer/tests/analysis_tests.rs | 48 +++++++++++++++++- cli/cli/src/commands/generate.rs | 50 +++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/api/libs/sql_analyzer/tests/analysis_tests.rs b/api/libs/sql_analyzer/tests/analysis_tests.rs index cda5d9606..98d782c2e 100644 --- a/api/libs/sql_analyzer/tests/analysis_tests.rs +++ b/api/libs/sql_analyzer/tests/analysis_tests.rs @@ -2230,4 +2230,50 @@ async fn test_bigquery_count_with_interval() { assert!(table.columns.contains("message_id"), "Missing 'message_id' column"); assert!(table.columns.contains("created_at"), "Missing 'created_at' column"); -} \ No newline at end of file +} + +#[tokio::test] +async fn test_postgres_cte_with_date_trunc() { + let sql = r#" + WITH recent_data AS ( + SELECT + tsr.year AS sales_year, + tsr.month AS sales_month, + tsr.metric_totalsalesrevenue AS total_revenue + FROM postgres.ont_ont.total_sales_revenue tsr + WHERE cast(concat(tsr.year, '-', tsr.month, '-01') AS date) + >= date_trunc('month', CURRENT_DATE) - INTERVAL '5 months' + ) + SELECT + DATE_TRUNC('month', cast(concat(sales_year, '-', sales_month, '-01') AS date)) AS month_start, + COALESCE(total_revenue, 0) AS total_revenue + FROM recent_data + ORDER BY month_start ASC; + "#; + + let result = analyze_query(sql.to_string(), "postgres").await.unwrap(); + + // Check CTE detection + assert_eq!(result.ctes.len(), 1, "Should detect one CTE"); + let cte = &result.ctes[0]; + assert_eq!(cte.name, "recent_data", "CTE should be named 'recent_data'"); + + // Check base table detection + assert_eq!(result.tables.len(), 1, "Should detect one base table"); + let table = &result.tables[0]; + assert_eq!(table.database_identifier, Some("postgres".to_string())); + assert_eq!(table.schema_identifier, Some("ont_ont".to_string())); + assert_eq!(table.table_identifier, "total_sales_revenue"); + + // Check columns in base table + assert!(table.columns.contains("year"), "Missing 'year' column"); + assert!(table.columns.contains("month"), "Missing 'month' column"); + assert!(table.columns.contains("metric_totalsalesrevenue"), "Missing 'metric_totalsalesrevenue' column"); + + // Check CTE columns + let cte_table = &cte.summary.tables[0]; + assert!(cte_table.columns.contains("sales_year"), "Missing 'sales_year' in CTE"); + assert!(cte_table.columns.contains("sales_month"), "Missing 'sales_month' in CTE"); + assert!(cte_table.columns.contains("total_revenue"), "Missing 'total_revenue' in CTE"); +} + diff --git a/cli/cli/src/commands/generate.rs b/cli/cli/src/commands/generate.rs index 9a13af3b8..a7497b939 100644 --- a/cli/cli/src/commands/generate.rs +++ b/cli/cli/src/commands/generate.rs @@ -453,6 +453,56 @@ for (unique_id, node) in &dbt_catalog.nodes { match existing_yaml_model_opt { Some(mut existing_model) => { + // --- Reconciliation Logic for Existing Model --- + let mut model_updated = false; + let original_dim_count = existing_model.dimensions.len(); + let original_measure_count = existing_model.measures.len(); + + // Get the set of column names from the dbt catalog for this model + let catalog_column_names: HashSet = catalog_node.columns + .keys() + .cloned() + .collect(); + + // Remove dimensions that are no longer in the catalog + existing_model.dimensions.retain(|dim| { + let keep = catalog_column_names.contains(&dim.name); + if !keep { + columns_removed_count += 1; + model_updated = true; + println!(" - Removing dimension '{}' (not in catalog)", dim.name.yellow()); + } + keep + }); + + // Remove measures that are no longer in the catalog + existing_model.measures.retain(|measure| { + let keep = catalog_column_names.contains(&measure.name); + if !keep { + columns_removed_count += 1; + model_updated = true; + println!(" - Removing measure '{}' (not in catalog)", measure.name.yellow()); + } + keep + }); + + // Note: We do NOT remove metrics, filters, or relationships automatically + // as they might represent derived logic or explicitly defined connections + // not directly tied 1:1 with current physical columns. + + // TODO: Add logic here to ADD new columns from the catalog as dimensions/measures + // if they don't already exist in the existing_model. + + if model_updated { + let yaml_string = serde_yaml::to_string(&existing_model)?; + fs::write(&individual_semantic_yaml_path, yaml_string)?; + models_updated_count += 1; + println!(" {} Updated existing semantic model: {}", "🔄".cyan(), individual_semantic_yaml_path.display().to_string().cyan()); + } else { + // If no columns were removed, maybe check if columns need *adding* later? + // For now, just indicate no changes needed based on removal. + // println!(" {} No column removals needed for: {}", "✅".dimmed(), individual_semantic_yaml_path.display().to_string().dimmed()); + } } None => { // New semantic model let mut dimensions = Vec::new();