mirror of https://github.com/buster-so/buster.git
generate command working well now
This commit is contained in:
parent
59671bda5b
commit
e25b7d6639
|
@ -58,19 +58,124 @@ pub async fn generate_semantic_models_command(
|
|||
}
|
||||
};
|
||||
|
||||
let catalog_nodes_by_name: HashMap<String, &CatalogNode> = dbt_catalog.nodes.values()
|
||||
.filter_map(|node| {
|
||||
if node.derived_resource_type.as_deref() == Some("model") {
|
||||
node.derived_model_name_from_file.as_ref().map(|name| (name.clone(), node))
|
||||
} else { None }
|
||||
})
|
||||
.collect();
|
||||
// Enhance catalog node lookup to use path information from unique_id
|
||||
let mut catalog_nodes_lookup: HashMap<String, &CatalogNode> = HashMap::new();
|
||||
|
||||
if catalog_nodes_by_name.is_empty() {
|
||||
// Function to extract path components from unique_id
|
||||
fn extract_path_components(unique_id: &str) -> Option<(String, String, String)> {
|
||||
let parts: Vec<&str> = unique_id.split('.').collect();
|
||||
// Format: model.project_name.directory_path.model_name
|
||||
// Or: source.project_name.schema.table
|
||||
if parts.len() >= 4 {
|
||||
let resource_type = parts[0].to_string();
|
||||
let project_name = parts[1].to_string();
|
||||
|
||||
// For models, combine all middle parts as directory path
|
||||
// For sources, use the schema
|
||||
let schema_or_path = if resource_type == "model" && parts.len() > 4 {
|
||||
parts[2..parts.len()-1].join(".")
|
||||
} else {
|
||||
parts[2].to_string()
|
||||
};
|
||||
|
||||
// Last part is always the model/table name
|
||||
let name = parts[parts.len()-1].to_string();
|
||||
|
||||
return Some((project_name, schema_or_path, name));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// Populate the lookup table with multiple keys for each node
|
||||
for (unique_id, node) in &dbt_catalog.nodes {
|
||||
// Always include the full unique_id as a lookup key
|
||||
catalog_nodes_lookup.insert(unique_id.clone(), node);
|
||||
|
||||
if let Some((project, path_or_schema, name)) = extract_path_components(unique_id) {
|
||||
// Add project.path.name as a lookup key
|
||||
let project_path_key = format!("{}.{}.{}", project, path_or_schema, name);
|
||||
catalog_nodes_lookup.insert(project_path_key, node);
|
||||
|
||||
// Add project.name as a lookup key
|
||||
let project_name_key = format!("{}.{}", project, name);
|
||||
catalog_nodes_lookup.insert(project_name_key, node);
|
||||
|
||||
// Add path.name as a lookup key
|
||||
let path_name_key = format!("{}.{}", path_or_schema, name);
|
||||
catalog_nodes_lookup.insert(path_name_key, node);
|
||||
}
|
||||
|
||||
// For backward compatibility, also index by simple name
|
||||
if let Some(derived_name) = &node.derived_model_name_from_file {
|
||||
// Check if this name already exists in the lookup to avoid collisions
|
||||
if !catalog_nodes_lookup.contains_key(derived_name) {
|
||||
catalog_nodes_lookup.insert(derived_name.clone(), node);
|
||||
} else {
|
||||
println!("{}", format!("⚠️ Warning: Multiple models with name '{}' found in catalog. Using path information for disambiguation.", derived_name).yellow());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if catalog_nodes_lookup.is_empty() {
|
||||
println!("{}", "ℹ️ No models found in dbt catalog. Nothing to generate/update.".yellow());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Helper function to extract path components from SQL file paths
|
||||
fn extract_sql_file_path_components(
|
||||
sql_file_path: &Path,
|
||||
project_root: &Path,
|
||||
model_roots: &[PathBuf]
|
||||
) -> Vec<String> {
|
||||
// First try to strip the project root to get a relative path
|
||||
if let Ok(rel_path) = sql_file_path.strip_prefix(project_root) {
|
||||
// Now try to strip any of the model roots to find the most specific path
|
||||
for model_root in model_roots {
|
||||
// Handle both absolute and relative model roots
|
||||
let abs_model_root = if model_root.is_absolute() {
|
||||
model_root.clone()
|
||||
} else {
|
||||
project_root.join(model_root)
|
||||
};
|
||||
|
||||
// Try to strip the model root from the SQL file path
|
||||
if let Ok(path_from_model_root) = rel_path.strip_prefix(&model_root) {
|
||||
// Split the remaining path into components
|
||||
let components: Vec<String> = path_from_model_root
|
||||
.parent()
|
||||
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
// If we found components, add the model name (filename without extension)
|
||||
let mut result = components;
|
||||
if let Some(file_name) = sql_file_path.file_stem() {
|
||||
result.push(file_name.to_string_lossy().into_owned());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try with absolute model root as well
|
||||
if let Ok(path_from_abs_model_root) = sql_file_path.strip_prefix(&abs_model_root) {
|
||||
let components: Vec<String> = path_from_abs_model_root
|
||||
.parent()
|
||||
.map(|p| p.components().map(|c| c.as_os_str().to_string_lossy().into_owned()).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut result = components;
|
||||
if let Some(file_name) = sql_file_path.file_stem() {
|
||||
result.push(file_name.to_string_lossy().into_owned());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Just return the file name without extension
|
||||
sql_file_path.file_stem()
|
||||
.map(|s| vec![s.to_string_lossy().into_owned()])
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// --- 2. Determine SQL Files to Process (based on path_arg or buster.yml model_paths) ---
|
||||
println!("\n{}", "⚙️ Determining SQL files to process...".dimmed());
|
||||
let mut sql_files_to_process: HashSet<PathBuf> = HashSet::new();
|
||||
|
@ -202,6 +307,54 @@ pub async fn generate_semantic_models_command(
|
|||
let proj_default_schema = buster_config.projects.as_ref()
|
||||
.and_then(|p| p.first()).and_then(|pc| pc.schema.as_deref());
|
||||
|
||||
// Helper function to find the best matching catalog node
|
||||
fn find_matching_catalog_node<'a>(
|
||||
lookup: &'a HashMap<String, &'a CatalogNode>,
|
||||
project_name: &str,
|
||||
path_components: &[String],
|
||||
model_name: &str
|
||||
) -> Option<(&'a CatalogNode, String, String)> {
|
||||
// If path components are available, try increasingly specific lookups
|
||||
if !path_components.is_empty() {
|
||||
// Try most specific lookup first: full path with project
|
||||
// Format: project.path1.path2.name
|
||||
if path_components.len() > 1 {
|
||||
let path_without_name = &path_components[0..path_components.len()-1];
|
||||
let path_str = path_without_name.join(".");
|
||||
let full_key = format!("{}.{}.{}", project_name, path_str, model_name);
|
||||
if let Some(node) = lookup.get(&full_key) {
|
||||
return Some((node, "full path".to_string(), full_key));
|
||||
}
|
||||
|
||||
// Try with just the path and name (no project prefix)
|
||||
let path_name_key = format!("{}.{}", path_str, model_name);
|
||||
if let Some(node) = lookup.get(&path_name_key) {
|
||||
return Some((node, "path and name".to_string(), path_name_key));
|
||||
}
|
||||
}
|
||||
|
||||
// Try with direct parent directory and name
|
||||
if path_components.len() > 1 {
|
||||
let direct_parent = &path_components[path_components.len()-2];
|
||||
let parent_name_key = format!("{}.{}", direct_parent, model_name);
|
||||
if let Some(node) = lookup.get(&parent_name_key) {
|
||||
return Some((node, "parent directory and name".to_string(), parent_name_key));
|
||||
}
|
||||
}
|
||||
|
||||
// Try project and name without middle path
|
||||
let project_name_key = format!("{}.{}", project_name, model_name);
|
||||
if let Some(node) = lookup.get(&project_name_key) {
|
||||
return Some((node, "project and name".to_string(), project_name_key));
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to simple name lookup
|
||||
lookup.get(model_name).map(|&node|
|
||||
(node, "simple name".to_string(), model_name.to_string())
|
||||
)
|
||||
}
|
||||
|
||||
for sql_file_abs_path in sql_files_to_process {
|
||||
let model_name_from_filename = sql_file_abs_path.file_stem().map_or_else(String::new, |s| s.to_string_lossy().into_owned());
|
||||
if model_name_from_filename.is_empty() {
|
||||
|
@ -209,9 +362,35 @@ pub async fn generate_semantic_models_command(
|
|||
continue;
|
||||
}
|
||||
|
||||
let Some(catalog_node) = catalog_nodes_by_name.get(&model_name_from_filename) else {
|
||||
eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry ('{}') in dbt catalog. Skipping.", sql_file_abs_path.display(), model_name_from_filename).dimmed());
|
||||
continue;
|
||||
// Extract path components from SQL file path
|
||||
let path_components = extract_sql_file_path_components(
|
||||
&sql_file_abs_path,
|
||||
&buster_config_dir,
|
||||
&dbt_project_model_roots_for_stripping
|
||||
);
|
||||
|
||||
// Get the first project name from configs, defaulting to "default" if not found
|
||||
let project_name = buster_config.projects.as_ref()
|
||||
.and_then(|p| p.first())
|
||||
.and_then(|p| p.name.as_ref())
|
||||
.map_or(String::from("default"), |v| v.clone());
|
||||
|
||||
// Try to find the matching catalog node using a prioritized approach
|
||||
let catalog_node = match find_matching_catalog_node(
|
||||
&catalog_nodes_lookup,
|
||||
&project_name,
|
||||
&path_components,
|
||||
&model_name_from_filename
|
||||
) {
|
||||
Some((node, match_type, key)) => {
|
||||
println!(" 🔍 Found model via {}: {}", match_type, key.purple());
|
||||
node
|
||||
},
|
||||
None => {
|
||||
eprintln!("{}", format!("ℹ️ Info: SQL model file '{}' found, but no corresponding entry in dbt catalog. Skipping.\nTried looking up with components: {:?}",
|
||||
sql_file_abs_path.display(), path_components).dimmed());
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let Some(ref table_meta) = catalog_node.metadata else {
|
||||
|
|
Loading…
Reference in New Issue