generate command working well now

This commit is contained in:
dal 2025-05-12 08:19:46 -06:00
parent 59671bda5b
commit e25b7d6639
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
1 changed files with 190 additions and 11 deletions

View File

@ -58,19 +58,124 @@ pub async fn generate_semantic_models_command(
}
};
let catalog_nodes_by_name: HashMap<String, &CatalogNode> = dbt_catalog.nodes.values()
.filter_map(|node| {
if node.derived_resource_type.as_deref() == Some("model") {
node.derived_model_name_from_file.as_ref().map(|name| (name.clone(), node))
} else { None }
})
.collect();
// Enhance catalog node lookup to use path information from unique_id
let mut catalog_nodes_lookup: HashMap<String, &CatalogNode> = HashMap::new();
if catalog_nodes_by_name.is_empty() {
// Function to extract path components from unique_id
fn extract_path_components(unique_id: &str) -> Option<(String, String, String)> {
let parts: Vec<&str> = unique_id.split('.').collect();
// Format: model.project_name.directory_path.model_name
// Or: source.project_name.schema.table
if parts.len() >= 4 {
let resource_type = parts[0].to_string();
let project_name = parts[1].to_string();
// For models, combine all middle parts as directory path
// For sources, use the schema
let schema_or_path = if resource_type == "model" && parts.len() > 4 {
parts[2..parts.len()-1].join(".")
} else {
parts[2].to_string()
};
// Last part is always the model/table name
let name = parts[parts.len()-1].to_string();
return Some((project_name, schema_or_path, name));
}
None
}
// Populate the lookup table with multiple keys for each node
for (unique_id, node) in &dbt_catalog.nodes {
// Always include the full unique_id as a lookup key
catalog_nodes_lookup.insert(unique_id.clone(), node);
if let Some((project, path_or_schema, name)) = extract_path_components(unique_id) {
// Add project.path.name as a lookup key
let project_path_key = format!("{}.{}.{}", project, path_or_schema, name);
catalog_nodes_lookup.insert(project_path_key, node);
// Add project.name as a lookup key
let project_name_key = format!("{}.{}", project, name);
catalog_nodes_lookup.insert(project_name_key, node);
// Add path.name as a lookup key
let path_name_key = format!("{}.{}", path_or_schema, name);
catalog_nodes_lookup.insert(path_name_key, node);
}
// For backward compatibility, also index by simple name
if let Some(derived_name) = &node.derived_model_name_from_file {
// Check if this name already exists in the lookup to avoid collisions
if !catalog_nodes_lookup.contains_key(derived_name) {
catalog_nodes_lookup.insert(derived_name.clone(), node);
} else {
println!("{}", format!("⚠️ Warning: Multiple models with name '{}' found in catalog. Using path information for disambiguation.", derived_name).yellow());
}
}
}
if catalog_nodes_lookup.is_empty() {
println!("{}", " No models found in dbt catalog. Nothing to generate/update.".yellow());
return Ok(());
}
// Helper function to extract path components from SQL file paths
fn extract_sql_file_path_components(
sql_file_path: &Path,
project_root: &Path,
model_roots: &[PathBuf]
) -> Vec<String> {
// First try to strip the project root to get a relative path
if let Ok(rel_path) = sql_file_path.strip_prefix(project_root) {
// Now try to strip any of the model roots to find the most specific path
for model_root in model_roots {
// Handle both absolute and relative model roots
let abs_model_root = if model_root.is_absolute() {
model_root.clone()
} else {
project_root.join(model_root)
};
// Try to strip the model root from the SQL file path
if let Ok(path_from_model_root) = rel_path.strip_prefix(&model_root) {
// Split the remaining path into components
let components: Vec<String> = path_from_model_root
.parent()
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
.unwrap_or_default();
// If we found components, add the model name (filename without extension)
let mut result = components;
if let Some(file_name) = sql_file_path.file_stem() {
result.push(file_name.to_string_lossy().into_owned());
}
return result;
}
// Try with absolute model root as well
if let Ok(path_from_abs_model_root) = sql_file_path.strip_prefix(&abs_model_root) {
let components: Vec<String> = path_from_abs_model_root
.parent()
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
.unwrap_or_default();
let mut result = components;
if let Some(file_name) = sql_file_path.file_stem() {
result.push(file_name.to_string_lossy().into_owned());
}
return result;
}
}
}
// Fallback: Just return the file name without extension
sql_file_path.file_stem()
.map(|s| vec![s.to_string_lossy().into_owned()])
.unwrap_or_default()
}
// --- 2. Determine SQL Files to Process (based on path_arg or buster.yml model_paths) ---
println!("\n{}", "⚙️ Determining SQL files to process...".dimmed());
let mut sql_files_to_process: HashSet<PathBuf> = HashSet::new();
@ -202,6 +307,54 @@ pub async fn generate_semantic_models_command(
let proj_default_schema = buster_config.projects.as_ref()
.and_then(|p| p.first()).and_then(|pc| pc.schema.as_deref());
// Helper function to find the best matching catalog node
fn find_matching_catalog_node<'a>(
lookup: &'a HashMap<String, &'a CatalogNode>,
project_name: &str,
path_components: &[String],
model_name: &str
) -> Option<(&'a CatalogNode, String, String)> {
// If path components are available, try increasingly specific lookups
if !path_components.is_empty() {
// Try most specific lookup first: full path with project
// Format: project.path1.path2.name
if path_components.len() > 1 {
let path_without_name = &path_components[0..path_components.len()-1];
let path_str = path_without_name.join(".");
let full_key = format!("{}.{}.{}", project_name, path_str, model_name);
if let Some(node) = lookup.get(&full_key) {
return Some((node, "full path".to_string(), full_key));
}
// Try with just the path and name (no project prefix)
let path_name_key = format!("{}.{}", path_str, model_name);
if let Some(node) = lookup.get(&path_name_key) {
return Some((node, "path and name".to_string(), path_name_key));
}
}
// Try with direct parent directory and name
if path_components.len() > 1 {
let direct_parent = &path_components[path_components.len()-2];
let parent_name_key = format!("{}.{}", direct_parent, model_name);
if let Some(node) = lookup.get(&parent_name_key) {
return Some((node, "parent directory and name".to_string(), parent_name_key));
}
}
// Try project and name without middle path
let project_name_key = format!("{}.{}", project_name, model_name);
if let Some(node) = lookup.get(&project_name_key) {
return Some((node, "project and name".to_string(), project_name_key));
}
}
// Fallback to simple name lookup
lookup.get(model_name).map(|&node|
(node, "simple name".to_string(), model_name.to_string())
)
}
for sql_file_abs_path in sql_files_to_process {
let model_name_from_filename = sql_file_abs_path.file_stem().map_or_else(String::new, |s| s.to_string_lossy().into_owned());
if model_name_from_filename.is_empty() {
@ -209,9 +362,35 @@ pub async fn generate_semantic_models_command(
continue;
}
let Some(catalog_node) = catalog_nodes_by_name.get(&model_name_from_filename) else {
eprintln!("{}", format!(" Info: SQL model file '{}' found, but no corresponding entry ('{}') in dbt catalog. Skipping.", sql_file_abs_path.display(), model_name_from_filename).dimmed());
continue;
// Extract path components from SQL file path
let path_components = extract_sql_file_path_components(
&sql_file_abs_path,
&buster_config_dir,
&dbt_project_model_roots_for_stripping
);
// Get the first project name from configs, defaulting to "default" if not found
let project_name = buster_config.projects.as_ref()
.and_then(|p| p.first())
.and_then(|p| p.name.as_ref())
.map_or(String::from("default"), |v| v.clone());
// Try to find the matching catalog node using a prioritized approach
let catalog_node = match find_matching_catalog_node(
&catalog_nodes_lookup,
&project_name,
&path_components,
&model_name_from_filename
) {
Some((node, match_type, key)) => {
println!(" 🔍 Found model via {}: {}", match_type, key.purple());
node
},
None => {
eprintln!("{}", format!(" Info: SQL model file '{}' found, but no corresponding entry in dbt catalog. Skipping.\nTried looking up with components: {:?}",
sql_file_abs_path.display(), path_components).dimmed());
continue;
}
};
let Some(ref table_meta) = catalog_node.metadata else {