parsing works, but still skipping too much

This commit is contained in:
dal 2025-05-06 12:58:50 -06:00
parent 70b92895eb
commit b8128dc75c
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
3 changed files with 88 additions and 45 deletions

View File

@ -240,18 +240,28 @@ pub async fn generate_semantic_models_command(
continue;
}
// Ensure metadata.name exists, as it's crucial for the semantic model name
let Some(ref dbt_model_name_for_yaml_from_metadata) = dbt_node.metadata.name else {
// Ensure metadata and metadata.name exist
let Some(ref dbt_node_metadata) = dbt_node.metadata else {
eprintln!(
"{}",
format!(
"Warning: Skipping dbt model with unique_id: {} because its 'metadata.name' is missing in catalog.json.",
"Warning: Skipping dbt node with unique_id: {} in generate because its 'metadata' block is missing.",
dbt_node.unique_id
).yellow()
);
continue;
};
let dbt_model_name_for_yaml = dbt_model_name_for_yaml_from_metadata.clone(); // Now safe to clone
let Some(ref dbt_model_name_from_metadata) = dbt_node_metadata.name else {
eprintln!(
"{}",
format!(
"Warning: Skipping dbt model with unique_id: {} in generate because its 'metadata.name' is missing.",
dbt_node.unique_id
).yellow()
);
continue;
};
let dbt_model_name_for_yaml = dbt_model_name_from_metadata.clone();
dbt_models_processed_count += 1;
// --- End Scoping Logic ---
@ -290,7 +300,7 @@ pub async fn generate_semantic_models_command(
model_was_updated = true;
}
if let Some(dbt_comment) = &dbt_node.metadata.comment {
if let Some(dbt_comment) = &dbt_node_metadata.comment {
if existing_model.description.as_deref() != Some(dbt_comment.as_str()) {
existing_model.description = Some(dbt_comment.clone());
model_was_updated = true;
@ -361,7 +371,7 @@ pub async fn generate_semantic_models_command(
for (col_name, dbt_col) in dbt_columns_map {
println!(" Adding new column '{}' to semantic model '{}'", col_name.green(), dbt_model_name_for_yaml);
if is_measure_type(&dbt_col.column_type) {
if crate::commands::init::is_measure_type(Some(dbt_col.column_type.as_str())) { // Assuming dbt_col.column_type is String
current_measures.push(YamlMeasure { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()) });
} else {
current_dims.push(YamlDimension { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()), searchable: false, options: None });
@ -385,17 +395,27 @@ pub async fn generate_semantic_models_command(
println!("Generating new semantic model: {} at {}", dbt_model_name_for_yaml.green(), individual_semantic_yaml_path.display());
let mut dimensions = Vec::new();
let mut measures = Vec::new();
for (_col_name, col) in &dbt_node.columns {
if is_measure_type(&col.column_type) {
measures.push(YamlMeasure { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()) });
for (_col_name, col) in &dbt_node.columns { // dbt_node.columns defaults to empty if missing
if crate::commands::init::is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String
measures.push(YamlMeasure {
name: col.name.clone(),
description: col.comment.clone(),
type_: Some(col.column_type.clone())
});
} else {
dimensions.push(YamlDimension { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()), searchable: false, options: None });
dimensions.push(YamlDimension {
name: col.name.clone(),
description: col.comment.clone(),
type_: Some(col.column_type.clone()),
searchable: false,
options: None
});
}
}
let new_model = YamlModel {
name: dbt_model_name_for_yaml.clone(),
description: dbt_node.metadata.comment.clone(),
data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()), // Default from first project context
description: dbt_node_metadata.comment.clone(), // Use dbt_node_metadata
data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()),
database: dbt_node.database.clone(),
schema: dbt_node.schema.clone(),
dimensions,

View File

@ -84,16 +84,21 @@ pub fn is_false(val: &bool) -> bool {
}
// Helper function to determine if a SQL type should be a measure
pub fn is_measure_type(sql_type: &str) -> bool {
let lower_sql_type = sql_type.to_lowercase();
lower_sql_type.contains("int") ||
lower_sql_type.contains("numeric") ||
lower_sql_type.contains("decimal") ||
lower_sql_type.contains("real") ||
lower_sql_type.contains("double") ||
lower_sql_type.contains("float") ||
lower_sql_type.contains("money") ||
lower_sql_type.contains("number")
pub fn is_measure_type(sql_type_opt: Option<&str>) -> bool {
match sql_type_opt {
Some(sql_type) => {
let lower_sql_type = sql_type.to_lowercase();
lower_sql_type.contains("int") ||
lower_sql_type.contains("numeric") ||
lower_sql_type.contains("decimal") ||
lower_sql_type.contains("real") ||
lower_sql_type.contains("double") ||
lower_sql_type.contains("float") ||
lower_sql_type.contains("money") ||
lower_sql_type.contains("number")
}
None => false, // If type is missing, default to not a measure (dimension)
}
}
// Enum for Database Type selection (ensure only one definition, placed before use)
@ -779,16 +784,25 @@ async fn generate_semantic_models_from_dbt_catalog(
eprintln!(
"{}",
format!(
"Warning: Skipping dbt model {} (unique_id: {}) because it is missing 'original_file_path' in catalog.json.",
node.name.as_deref().unwrap_or("[unknown name]"), // Use derived node.name if available
"Warning: Skipping dbt model unique_id: {} because it is missing 'original_file_path' in catalog.json.",
node.unique_id
).yellow()
);
continue;
};
// Ensure metadata.name exists, as it's crucial for the semantic model name
let Some(ref actual_model_name_from_metadata) = node.metadata.name else {
// Ensure metadata and metadata.name exist, as it's crucial for the semantic model name
let Some(ref node_metadata) = node.metadata else {
eprintln!(
"{}",
format!(
"Warning: Skipping dbt model with unique_id: {} because its 'metadata' block is missing in catalog.json.",
node.unique_id
).yellow()
);
continue;
};
let Some(ref actual_model_name_from_metadata) = node_metadata.name else {
eprintln!(
"{}",
format!(
@ -798,7 +812,7 @@ async fn generate_semantic_models_from_dbt_catalog(
);
continue;
};
let actual_model_name = actual_model_name_from_metadata.clone(); // Now safe to clone
let actual_model_name = actual_model_name_from_metadata.clone();
let original_file_path_abs = buster_config_dir.join(original_file_path_str);
@ -821,33 +835,33 @@ async fn generate_semantic_models_from_dbt_catalog(
let mut dimensions: Vec<YamlDimension> = Vec::new();
let mut measures: Vec<YamlMeasure> = Vec::new();
for (_col_name, col) in &node.columns {
if is_measure_type(&col.column_type) {
for (_col_name, col) in &node.columns { // node.columns is HashMap, defaults to empty if missing
if is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String here based on linter
measures.push(YamlMeasure {
name: col.name.clone(),
description: col.comment.clone(),
type_: Some(col.column_type.clone()),
name: col.name.clone(),
description: col.comment.clone(),
type_: Some(col.column_type.clone()), // Wrap in Some()
});
} else {
dimensions.push(YamlDimension {
name: col.name.clone(),
description: col.comment.clone(),
type_: Some(col.column_type.clone()),
searchable: false, // Default to false, user can change
type_: Some(col.column_type.clone()), // Wrap in Some()
searchable: false,
options: None,
});
}
}
let yaml_model = YamlModel {
name: actual_model_name, // This should be the model's identifier name
description: node.metadata.comment.clone(), // Use metadata.comment as the source for description
name: actual_model_name.clone(),
description: node_metadata.comment.clone(), // Access comment via node_metadata ref
data_source_name: default_data_source_name.cloned(),
database: node.database.clone().or_else(|| default_database.cloned()),
schema: node.schema.clone().or_else(|| default_schema.cloned()),
database: node.database.clone().or_else(|| default_database.cloned()), // node.database is Option<String>
schema: node.schema.clone().or_else(|| default_schema.cloned()), // node.schema is Option<String>
dimensions,
measures,
original_file_path: Some(original_file_path_str.clone()), // Keep original dbt model path for reference
original_file_path: Some(original_file_path_str.clone()),
};
// Determine the output path for this individual YAML model

View File

@ -5,9 +5,11 @@ use std::collections::HashMap;
#[derive(Debug, Deserialize, Clone)]
pub struct DbtCatalog {
pub metadata: DbtCatalogMetadata,
#[serde(default)]
pub metadata: Option<DbtCatalogMetadata>,
#[serde(default)]
pub nodes: HashMap<String, DbtNode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub sources: Option<HashMap<String, DbtSource>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub macros: Option<HashMap<String, serde_json::Value>>,
@ -29,8 +31,8 @@ pub struct DbtCatalog {
#[derive(Debug, Deserialize, Clone)]
pub struct DbtCatalogMetadata {
#[serde(rename = "dbt_schema_version")]
pub dbt_schema_version: String,
#[serde(rename = "dbt_schema_version", default)]
pub dbt_schema_version: Option<String>,
#[allow(dead_code)] // If not used directly by Buster, but good for complete parsing
pub dbt_version: Option<String>,
#[allow(dead_code)]
@ -47,7 +49,9 @@ pub struct DbtNode {
// However, standard dbt catalog.json *does* have a metadata block within each node.
// The example provided might be a slight simplification or custom representation.
// Assuming standard catalog structure for now, where DbtNodeMetadata is a separate struct.
pub metadata: DbtNodeMetadata,
#[serde(default)]
pub metadata: Option<DbtNodeMetadata>,
#[serde(default)]
pub columns: HashMap<String, DbtColumn>,
#[serde(rename = "resource_type")] // if resource_type is not directly in JSON, this helps map if some other key exists
// if type is the key in JSON for resource_type, then it should be:
@ -84,13 +88,18 @@ pub struct DbtNodeMetadata {
#[derive(Debug, Deserialize, Clone)]
pub struct DbtSource {
pub name: String, // This is the source's table name
#[serde(default)]
pub name: Option<String>, // This is the source's table name
pub unique_id: String,
#[serde(default)]
pub database: Option<String>,
#[serde(default)]
pub schema: Option<String>,
#[serde(default, alias = "resource_type")] // Sources have "source" as resource_type, or a specific table type.
pub table_type: Option<String>, // e.g. "table", often not explicitly a 'type' field in catalog for sources, but implied.
#[serde(default)]
pub columns: HashMap<String, DbtColumn>,
#[serde(default)]
pub comment: Option<String>,
pub stats: Option<serde_json::Value>,
// Sources can also have a 'meta' field, 'tags', 'description', 'loader', 'freshness' etc.