mirror of https://github.com/buster-so/buster.git
generate command working well now
This commit is contained in:
parent
59671bda5b
commit
e25b7d6639
|
@ -58,19 +58,124 @@ pub async fn generate_semantic_models_command(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let catalog_nodes_by_name: HashMap<String, &CatalogNode> = dbt_catalog.nodes.values()
|
// Enhance catalog node lookup to use path information from unique_id
|
||||||
.filter_map(|node| {
|
let mut catalog_nodes_lookup: HashMap<String, &CatalogNode> = HashMap::new();
|
||||||
if node.derived_resource_type.as_deref() == Some("model") {
|
|
||||||
node.derived_model_name_from_file.as_ref().map(|name| (name.clone(), node))
|
|
||||||
} else { None }
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if catalog_nodes_by_name.is_empty() {
|
// Function to extract path components from unique_id
|
||||||
|
fn extract_path_components(unique_id: &str) -> Option<(String, String, String)> {
|
||||||
|
let parts: Vec<&str> = unique_id.split('.').collect();
|
||||||
|
// Format: model.project_name.directory_path.model_name
|
||||||
|
// Or: source.project_name.schema.table
|
||||||
|
if parts.len() >= 4 {
|
||||||
|
let resource_type = parts[0].to_string();
|
||||||
|
let project_name = parts[1].to_string();
|
||||||
|
|
||||||
|
// For models, combine all middle parts as directory path
|
||||||
|
// For sources, use the schema
|
||||||
|
let schema_or_path = if resource_type == "model" && parts.len() > 4 {
|
||||||
|
parts[2..parts.len()-1].join(".")
|
||||||
|
} else {
|
||||||
|
parts[2].to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Last part is always the model/table name
|
||||||
|
let name = parts[parts.len()-1].to_string();
|
||||||
|
|
||||||
|
return Some((project_name, schema_or_path, name));
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate the lookup table with multiple keys for each node
|
||||||
|
for (unique_id, node) in &dbt_catalog.nodes {
|
||||||
|
// Always include the full unique_id as a lookup key
|
||||||
|
catalog_nodes_lookup.insert(unique_id.clone(), node);
|
||||||
|
|
||||||
|
if let Some((project, path_or_schema, name)) = extract_path_components(unique_id) {
|
||||||
|
// Add project.path.name as a lookup key
|
||||||
|
let project_path_key = format!("{}.{}.{}", project, path_or_schema, name);
|
||||||
|
catalog_nodes_lookup.insert(project_path_key, node);
|
||||||
|
|
||||||
|
// Add project.name as a lookup key
|
||||||
|
let project_name_key = format!("{}.{}", project, name);
|
||||||
|
catalog_nodes_lookup.insert(project_name_key, node);
|
||||||
|
|
||||||
|
// Add path.name as a lookup key
|
||||||
|
let path_name_key = format!("{}.{}", path_or_schema, name);
|
||||||
|
catalog_nodes_lookup.insert(path_name_key, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For backward compatibility, also index by simple name
|
||||||
|
if let Some(derived_name) = &node.derived_model_name_from_file {
|
||||||
|
// Check if this name already exists in the lookup to avoid collisions
|
||||||
|
if !catalog_nodes_lookup.contains_key(derived_name) {
|
||||||
|
catalog_nodes_lookup.insert(derived_name.clone(), node);
|
||||||
|
} else {
|
||||||
|
println!("{}", format!("⚠️ Warning: Multiple models with name '{}' found in catalog. Using path information for disambiguation.", derived_name).yellow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if catalog_nodes_lookup.is_empty() {
|
||||||
println!("{}", "ℹ️ No models found in dbt catalog. Nothing to generate/update.".yellow());
|
println!("{}", "ℹ️ No models found in dbt catalog. Nothing to generate/update.".yellow());
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to extract path components from SQL file paths
|
||||||
|
fn extract_sql_file_path_components(
|
||||||
|
sql_file_path: &Path,
|
||||||
|
project_root: &Path,
|
||||||
|
model_roots: &[PathBuf]
|
||||||
|
) -> Vec<String> {
|
||||||
|
// First try to strip the project root to get a relative path
|
||||||
|
if let Ok(rel_path) = sql_file_path.strip_prefix(project_root) {
|
||||||
|
// Now try to strip any of the model roots to find the most specific path
|
||||||
|
for model_root in model_roots {
|
||||||
|
// Handle both absolute and relative model roots
|
||||||
|
let abs_model_root = if model_root.is_absolute() {
|
||||||
|
model_root.clone()
|
||||||
|
} else {
|
||||||
|
project_root.join(model_root)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try to strip the model root from the SQL file path
|
||||||
|
if let Ok(path_from_model_root) = rel_path.strip_prefix(&model_root) {
|
||||||
|
// Split the remaining path into components
|
||||||
|
let components: Vec<String> = path_from_model_root
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// If we found components, add the model name (filename without extension)
|
||||||
|
let mut result = components;
|
||||||
|
if let Some(file_name) = sql_file_path.file_stem() {
|
||||||
|
result.push(file_name.to_string_lossy().into_owned());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try with absolute model root as well
|
||||||
|
if let Ok(path_from_abs_model_root) = sql_file_path.strip_prefix(&abs_model_root) {
|
||||||
|
let components: Vec<String> = path_from_abs_model_root
|
||||||
|
.parent()
|
||||||
|
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let mut result = components;
|
||||||
|
if let Some(file_name) = sql_file_path.file_stem() {
|
||||||
|
result.push(file_name.to_string_lossy().into_owned());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: Just return the file name without extension
|
||||||
|
sql_file_path.file_stem()
|
||||||
|
.map(|s| vec![s.to_string_lossy().into_owned()])
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
// --- 2. Determine SQL Files to Process (based on path_arg or buster.yml model_paths) ---
|
// --- 2. Determine SQL Files to Process (based on path_arg or buster.yml model_paths) ---
|
||||||
println!("\n{}", "⚙️ Determining SQL files to process...".dimmed());
|
println!("\n{}", "⚙️ Determining SQL files to process...".dimmed());
|
||||||
let mut sql_files_to_process: HashSet<PathBuf> = HashSet::new();
|
let mut sql_files_to_process: HashSet<PathBuf> = HashSet::new();
|
||||||
|
@ -202,6 +307,54 @@ pub async fn generate_semantic_models_command(
|
||||||
let proj_default_schema = buster_config.projects.as_ref()
|
let proj_default_schema = buster_config.projects.as_ref()
|
||||||
.and_then(|p| p.first()).and_then(|pc| pc.schema.as_deref());
|
.and_then(|p| p.first()).and_then(|pc| pc.schema.as_deref());
|
||||||
|
|
||||||
|
// Helper function to find the best matching catalog node
|
||||||
|
fn find_matching_catalog_node<'a>(
|
||||||
|
lookup: &'a HashMap<String, &'a CatalogNode>,
|
||||||
|
project_name: &str,
|
||||||
|
path_components: &[String],
|
||||||
|
model_name: &str
|
||||||
|
) -> Option<(&'a CatalogNode, String, String)> {
|
||||||
|
// If path components are available, try increasingly specific lookups
|
||||||
|
if !path_components.is_empty() {
|
||||||
|
// Try most specific lookup first: full path with project
|
||||||
|
// Format: project.path1.path2.name
|
||||||
|
if path_components.len() > 1 {
|
||||||
|
let path_without_name = &path_components[0..path_components.len()-1];
|
||||||
|
let path_str = path_without_name.join(".");
|
||||||
|
let full_key = format!("{}.{}.{}", project_name, path_str, model_name);
|
||||||
|
if let Some(node) = lookup.get(&full_key) {
|
||||||
|
return Some((node, "full path".to_string(), full_key));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try with just the path and name (no project prefix)
|
||||||
|
let path_name_key = format!("{}.{}", path_str, model_name);
|
||||||
|
if let Some(node) = lookup.get(&path_name_key) {
|
||||||
|
return Some((node, "path and name".to_string(), path_name_key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try with direct parent directory and name
|
||||||
|
if path_components.len() > 1 {
|
||||||
|
let direct_parent = &path_components[path_components.len()-2];
|
||||||
|
let parent_name_key = format!("{}.{}", direct_parent, model_name);
|
||||||
|
if let Some(node) = lookup.get(&parent_name_key) {
|
||||||
|
return Some((node, "parent directory and name".to_string(), parent_name_key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try project and name without middle path
|
||||||
|
let project_name_key = format!("{}.{}", project_name, model_name);
|
||||||
|
if let Some(node) = lookup.get(&project_name_key) {
|
||||||
|
return Some((node, "project and name".to_string(), project_name_key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to simple name lookup
|
||||||
|
lookup.get(model_name).map(|&node|
|
||||||
|
(node, "simple name".to_string(), model_name.to_string())
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
for sql_file_abs_path in sql_files_to_process {
|
for sql_file_abs_path in sql_files_to_process {
|
||||||
let model_name_from_filename = sql_file_abs_path.file_stem().map_or_else(String::new, |s| s.to_string_lossy().into_owned());
|
let model_name_from_filename = sql_file_abs_path.file_stem().map_or_else(String::new, |s| s.to_string_lossy().into_owned());
|
||||||
if model_name_from_filename.is_empty() {
|
if model_name_from_filename.is_empty() {
|
||||||
|
@ -209,9 +362,35 @@ pub async fn generate_semantic_models_command(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Some(catalog_node) = catalog_nodes_by_name.get(&model_name_from_filename) else {
|
// Extract path components from SQL file path
|
||||||
eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry ('{}') in dbt catalog. Skipping.", sql_file_abs_path.display(), model_name_from_filename).dimmed());
|
let path_components = extract_sql_file_path_components(
|
||||||
|
&sql_file_abs_path,
|
||||||
|
&buster_config_dir,
|
||||||
|
&dbt_project_model_roots_for_stripping
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get the first project name from configs, defaulting to "default" if not found
|
||||||
|
let project_name = buster_config.projects.as_ref()
|
||||||
|
.and_then(|p| p.first())
|
||||||
|
.and_then(|p| p.name.as_ref())
|
||||||
|
.map_or(String::from("default"), |v| v.clone());
|
||||||
|
|
||||||
|
// Try to find the matching catalog node using a prioritized approach
|
||||||
|
let catalog_node = match find_matching_catalog_node(
|
||||||
|
&catalog_nodes_lookup,
|
||||||
|
&project_name,
|
||||||
|
&path_components,
|
||||||
|
&model_name_from_filename
|
||||||
|
) {
|
||||||
|
Some((node, match_type, key)) => {
|
||||||
|
println!(" 🔍 Found model via {}: {}", match_type, key.purple());
|
||||||
|
node
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry in dbt catalog. Skipping.\nTried looking up with components: {:?}",
|
||||||
|
sql_file_abs_path.display(), path_components).dimmed());
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let Some(ref table_meta) = catalog_node.metadata else {
|
let Some(ref table_meta) = catalog_node.metadata else {
|
||||||
|
|
Loading…
Reference in New Issue