From e25b7d663952840b95b07625584faac4088e8c43 Mon Sep 17 00:00:00 2001 From: dal Date: Mon, 12 May 2025 08:19:46 -0600 Subject: [PATCH] generate command working well now --- cli/cli/src/commands/generate.rs | 201 +++++++++++++++++++++++++++++-- 1 file changed, 190 insertions(+), 11 deletions(-) diff --git a/cli/cli/src/commands/generate.rs b/cli/cli/src/commands/generate.rs index 2affbc18e..2735b1c05 100644 --- a/cli/cli/src/commands/generate.rs +++ b/cli/cli/src/commands/generate.rs @@ -58,19 +58,124 @@ pub async fn generate_semantic_models_command( } }; - let catalog_nodes_by_name: HashMap = dbt_catalog.nodes.values() - .filter_map(|node| { - if node.derived_resource_type.as_deref() == Some("model") { - node.derived_model_name_from_file.as_ref().map(|name| (name.clone(), node)) - } else { None } - }) - .collect(); + // Enhance catalog node lookup to use path information from unique_id +let mut catalog_nodes_lookup: HashMap = HashMap::new(); - if catalog_nodes_by_name.is_empty() { +// Function to extract path components from unique_id +fn extract_path_components(unique_id: &str) -> Option<(String, String, String)> { + let parts: Vec<&str> = unique_id.split('.').collect(); + // Format: model.project_name.directory_path.model_name + // Or: source.project_name.schema.table + if parts.len() >= 4 { + let resource_type = parts[0].to_string(); + let project_name = parts[1].to_string(); + + // For models, combine all middle parts as directory path + // For sources, use the schema + let schema_or_path = if resource_type == "model" && parts.len() > 4 { + parts[2..parts.len()-1].join(".") + } else { + parts[2].to_string() + }; + + // Last part is always the model/table name + let name = parts[parts.len()-1].to_string(); + + return Some((project_name, schema_or_path, name)); + } + None +} + +// Populate the lookup table with multiple keys for each node +for (unique_id, node) in &dbt_catalog.nodes { + // Always include the full unique_id as a lookup key + catalog_nodes_lookup.insert(unique_id.clone(), node); + + if let Some((project, path_or_schema, name)) = extract_path_components(unique_id) { + // Add project.path.name as a lookup key + let project_path_key = format!("{}.{}.{}", project, path_or_schema, name); + catalog_nodes_lookup.insert(project_path_key, node); + + // Add project.name as a lookup key + let project_name_key = format!("{}.{}", project, name); + catalog_nodes_lookup.insert(project_name_key, node); + + // Add path.name as a lookup key + let path_name_key = format!("{}.{}", path_or_schema, name); + catalog_nodes_lookup.insert(path_name_key, node); + } + + // For backward compatibility, also index by simple name + if let Some(derived_name) = &node.derived_model_name_from_file { + // Check if this name already exists in the lookup to avoid collisions + if !catalog_nodes_lookup.contains_key(derived_name) { + catalog_nodes_lookup.insert(derived_name.clone(), node); + } else { + println!("{}", format!("⚠️ Warning: Multiple models with name '{}' found in catalog. Using path information for disambiguation.", derived_name).yellow()); + } + } +} + + if catalog_nodes_lookup.is_empty() { println!("{}", "ℹ️ No models found in dbt catalog. Nothing to generate/update.".yellow()); return Ok(()); } + // Helper function to extract path components from SQL file paths + fn extract_sql_file_path_components( + sql_file_path: &Path, + project_root: &Path, + model_roots: &[PathBuf] + ) -> Vec { + // First try to strip the project root to get a relative path + if let Ok(rel_path) = sql_file_path.strip_prefix(project_root) { + // Now try to strip any of the model roots to find the most specific path + for model_root in model_roots { + // Handle both absolute and relative model roots + let abs_model_root = if model_root.is_absolute() { + model_root.clone() + } else { + project_root.join(model_root) + }; + + // Try to strip the model root from the SQL file path + if let Ok(path_from_model_root) = rel_path.strip_prefix(&model_root) { + // Split the remaining path into components + let components: Vec = path_from_model_root + .parent() + .map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect()) + .unwrap_or_default(); + + // If we found components, add the model name (filename without extension) + let mut result = components; + if let Some(file_name) = sql_file_path.file_stem() { + result.push(file_name.to_string_lossy().into_owned()); + } + return result; + } + + // Try with absolute model root as well + if let Ok(path_from_abs_model_root) = sql_file_path.strip_prefix(&abs_model_root) { + let components: Vec = path_from_abs_model_root + .parent() + .map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect()) + .unwrap_or_default(); + + let mut result = components; + if let Some(file_name) = sql_file_path.file_stem() { + result.push(file_name.to_string_lossy().into_owned()); + } + return result; + } + } + } + + // Fallback: Just return the file name without extension + sql_file_path.file_stem() + .map(|s| vec![s.to_string_lossy().into_owned()]) + .unwrap_or_default() + } + // --- 2. Determine SQL Files to Process (based on path_arg or buster.yml model_paths) --- println!("\n{}", "⚙️ Determining SQL files to process...".dimmed()); let mut sql_files_to_process: HashSet = HashSet::new(); @@ -202,6 +307,54 @@ pub async fn generate_semantic_models_command( let proj_default_schema = buster_config.projects.as_ref() .and_then(|p| p.first()).and_then(|pc| pc.schema.as_deref()); + // Helper function to find the best matching catalog node + fn find_matching_catalog_node<'a>( + lookup: &'a HashMap, + project_name: &str, + path_components: &[String], + model_name: &str + ) -> Option<(&'a CatalogNode, String, String)> { + // If path components are available, try increasingly specific lookups + if !path_components.is_empty() { + // Try most specific lookup first: full path with project + // Format: project.path1.path2.name + if path_components.len() > 1 { + let path_without_name = &path_components[0..path_components.len()-1]; + let path_str = path_without_name.join("."); + let full_key = format!("{}.{}.{}", project_name, path_str, model_name); + if let Some(node) = lookup.get(&full_key) { + return Some((node, "full path".to_string(), full_key)); + } + + // Try with just the path and name (no project prefix) + let path_name_key = format!("{}.{}", path_str, model_name); + if let Some(node) = lookup.get(&path_name_key) { + return Some((node, "path and name".to_string(), path_name_key)); + } + } + + // Try with direct parent directory and name + if path_components.len() > 1 { + let direct_parent = &path_components[path_components.len()-2]; + let parent_name_key = format!("{}.{}", direct_parent, model_name); + if let Some(node) = lookup.get(&parent_name_key) { + return Some((node, "parent directory and name".to_string(), parent_name_key)); + } + } + + // Try project and name without middle path + let project_name_key = format!("{}.{}", project_name, model_name); + if let Some(node) = lookup.get(&project_name_key) { + return Some((node, "project and name".to_string(), project_name_key)); + } + } + + // Fallback to simple name lookup + lookup.get(model_name).map(|&node| + (node, "simple name".to_string(), model_name.to_string()) + ) + } + for sql_file_abs_path in sql_files_to_process { let model_name_from_filename = sql_file_abs_path.file_stem().map_or_else(String::new, |s| s.to_string_lossy().into_owned()); if model_name_from_filename.is_empty() { @@ -209,9 +362,35 @@ pub async fn generate_semantic_models_command( continue; } - let Some(catalog_node) = catalog_nodes_by_name.get(&model_name_from_filename) else { - eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry ('{}') in dbt catalog. Skipping.", sql_file_abs_path.display(), model_name_from_filename).dimmed()); - continue; + // Extract path components from SQL file path + let path_components = extract_sql_file_path_components( + &sql_file_abs_path, + &buster_config_dir, + &dbt_project_model_roots_for_stripping + ); + + // Get the first project name from configs, defaulting to "default" if not found + let project_name = buster_config.projects.as_ref() + .and_then(|p| p.first()) + .and_then(|p| p.name.as_ref()) + .map_or(String::from("default"), |v| v.clone()); + + // Try to find the matching catalog node using a prioritized approach + let catalog_node = match find_matching_catalog_node( + &catalog_nodes_lookup, + &project_name, + &path_components, + &model_name_from_filename + ) { + Some((node, match_type, key)) => { + println!(" 🔍 Found model via {}: {}", match_type, key.purple()); + node + }, + None => { + eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry in dbt catalog. Skipping.\nTried looking up with components: {:?}", + sql_file_abs_path.display(), path_components).dimmed()); + continue; + } }; let Some(ref table_meta) = catalog_node.metadata else {