parsing works, but still skipping too much

2025-05-06 12:58:50 -06:00 · 2025-05-06 12:58:50 -06:00 · b8128dc75c
parent 70b92895eb
commit b8128dc75c
3 changed files with 88 additions and 45 deletions
--- a/cli/cli/src/commands/generate.rs
+++ b/cli/cli/src/commands/generate.rs
@ -240,18 +240,28 @@ pub async fn generate_semantic_models_command(
            continue;
        }

-        // Ensure metadata.name exists, as it's crucial for the semantic model name
-        let Some(ref dbt_model_name_for_yaml_from_metadata) = dbt_node.metadata.name else {
+        // Ensure metadata and metadata.name exist
+        let Some(ref dbt_node_metadata) = dbt_node.metadata else {
            eprintln!(
                "{}",
                format!(
-                    "Warning: Skipping dbt model with unique_id: {} because its 'metadata.name' is missing in catalog.json.",
+                    "Warning: Skipping dbt node with unique_id: {} in generate because its 'metadata' block is missing.",
                    dbt_node.unique_id
                ).yellow()
            );
            continue;
        };
-        let dbt_model_name_for_yaml = dbt_model_name_for_yaml_from_metadata.clone(); // Now safe to clone
+        let Some(ref dbt_model_name_from_metadata) = dbt_node_metadata.name else {
+            eprintln!(
+                "{}",
+                format!(
+                    "Warning: Skipping dbt model with unique_id: {} in generate because its 'metadata.name' is missing.",
+                    dbt_node.unique_id
+                ).yellow()
+            );
+            continue;
+        };
+        let dbt_model_name_for_yaml = dbt_model_name_from_metadata.clone();

        dbt_models_processed_count += 1;
        // --- End Scoping Logic ---
@ -290,7 +300,7 @@ pub async fn generate_semantic_models_command(
                    model_was_updated = true;
                }

-                if let Some(dbt_comment) = &dbt_node.metadata.comment {
+                if let Some(dbt_comment) = &dbt_node_metadata.comment {
                    if existing_model.description.as_deref() != Some(dbt_comment.as_str()) {
                        existing_model.description = Some(dbt_comment.clone());
                        model_was_updated = true;
@ -361,7 +371,7 @@ pub async fn generate_semantic_models_command(

                for (col_name, dbt_col) in dbt_columns_map {
                    println!("  Adding new column '{}' to semantic model '{}'", col_name.green(), dbt_model_name_for_yaml);
-                    if is_measure_type(&dbt_col.column_type) {
+                    if crate::commands::init::is_measure_type(Some(dbt_col.column_type.as_str())) { // Assuming dbt_col.column_type is String
                        current_measures.push(YamlMeasure { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()) });
                    } else {
                        current_dims.push(YamlDimension { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()), searchable: false, options: None });
@ -385,17 +395,27 @@ pub async fn generate_semantic_models_command(
                println!("Generating new semantic model: {} at {}", dbt_model_name_for_yaml.green(), individual_semantic_yaml_path.display());
                let mut dimensions = Vec::new();
                let mut measures = Vec::new();
-                for (_col_name, col) in &dbt_node.columns {
-                    if is_measure_type(&col.column_type) {
-                        measures.push(YamlMeasure { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()) });
+                for (_col_name, col) in &dbt_node.columns { // dbt_node.columns defaults to empty if missing
+                    if crate::commands::init::is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String
+                        measures.push(YamlMeasure { 
+                            name: col.name.clone(), 
+                            description: col.comment.clone(), 
+                            type_: Some(col.column_type.clone()) 
+                        });
                    } else {
-                        dimensions.push(YamlDimension { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()), searchable: false, options: None });
+                        dimensions.push(YamlDimension { 
+                            name: col.name.clone(), 
+                            description: col.comment.clone(), 
+                            type_: Some(col.column_type.clone()), 
+                            searchable: false, 
+                            options: None 
+                        });
                    }
                }
                let new_model = YamlModel {
                    name: dbt_model_name_for_yaml.clone(),
-                    description: dbt_node.metadata.comment.clone(),
-                    data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()), // Default from first project context
+                    description: dbt_node_metadata.comment.clone(), // Use dbt_node_metadata
+                    data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()),
                    database: dbt_node.database.clone(),
                    schema: dbt_node.schema.clone(),
                    dimensions,
--- a/cli/cli/src/commands/init.rs
+++ b/cli/cli/src/commands/init.rs
@ -84,16 +84,21 @@ pub fn is_false(val: &bool) -> bool {
 }

 // Helper function to determine if a SQL type should be a measure
-pub fn is_measure_type(sql_type: &str) -> bool {
-    let lower_sql_type = sql_type.to_lowercase();
-    lower_sql_type.contains("int") || 
-    lower_sql_type.contains("numeric") ||
-    lower_sql_type.contains("decimal") ||
-    lower_sql_type.contains("real") || 
-    lower_sql_type.contains("double") ||
-    lower_sql_type.contains("float") ||
-    lower_sql_type.contains("money") ||
-    lower_sql_type.contains("number")
+pub fn is_measure_type(sql_type_opt: Option<&str>) -> bool {
+    match sql_type_opt {
+        Some(sql_type) => {
+            let lower_sql_type = sql_type.to_lowercase();
+            lower_sql_type.contains("int") || 
+            lower_sql_type.contains("numeric") ||
+            lower_sql_type.contains("decimal") ||
+            lower_sql_type.contains("real") || 
+            lower_sql_type.contains("double") ||
+            lower_sql_type.contains("float") ||
+            lower_sql_type.contains("money") ||
+            lower_sql_type.contains("number")
+        }
+        None => false, // If type is missing, default to not a measure (dimension)
+    }
 }

 // Enum for Database Type selection (ensure only one definition, placed before use)
@ -779,16 +784,25 @@ async fn generate_semantic_models_from_dbt_catalog(
            eprintln!(
                "{}",
                format!(
-                    "Warning: Skipping dbt model {} (unique_id: {}) because it is missing 'original_file_path' in catalog.json.", 
-                    node.name.as_deref().unwrap_or("[unknown name]"), // Use derived node.name if available
+                    "Warning: Skipping dbt model unique_id: {} because it is missing 'original_file_path' in catalog.json.",
                    node.unique_id
                ).yellow()
            );
            continue;
        };

-        // Ensure metadata.name exists, as it's crucial for the semantic model name
-        let Some(ref actual_model_name_from_metadata) = node.metadata.name else {
+        // Ensure metadata and metadata.name exist, as it's crucial for the semantic model name
+        let Some(ref node_metadata) = node.metadata else {
+            eprintln!(
+                "{}",
+                format!(
+                    "Warning: Skipping dbt model with unique_id: {} because its 'metadata' block is missing in catalog.json.",
+                    node.unique_id
+                ).yellow()
+            );
+            continue;
+        };
+        let Some(ref actual_model_name_from_metadata) = node_metadata.name else {
            eprintln!(
                "{}",
                format!(
@ -798,7 +812,7 @@ async fn generate_semantic_models_from_dbt_catalog(
            );
            continue;
        };
-        let actual_model_name = actual_model_name_from_metadata.clone(); // Now safe to clone
+        let actual_model_name = actual_model_name_from_metadata.clone();

        let original_file_path_abs = buster_config_dir.join(original_file_path_str);

@ -821,33 +835,33 @@ async fn generate_semantic_models_from_dbt_catalog(
        let mut dimensions: Vec<YamlDimension> = Vec::new();
        let mut measures: Vec<YamlMeasure> = Vec::new();

-        for (_col_name, col) in &node.columns {
-            if is_measure_type(&col.column_type) {
+        for (_col_name, col) in &node.columns { // node.columns is HashMap, defaults to empty if missing
+            if is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String here based on linter
                measures.push(YamlMeasure {
-                    name: col.name.clone(),
-                    description: col.comment.clone(),
-                    type_: Some(col.column_type.clone()),
+                    name: col.name.clone(), 
+                    description: col.comment.clone(), 
+                    type_: Some(col.column_type.clone()), // Wrap in Some()
                });
            } else {
                dimensions.push(YamlDimension {
                    name: col.name.clone(),
                    description: col.comment.clone(),
-                    type_: Some(col.column_type.clone()),
-                    searchable: false, // Default to false, user can change
+                    type_: Some(col.column_type.clone()), // Wrap in Some()
+                    searchable: false, 
                    options: None,
                });
            }
        }

        let yaml_model = YamlModel {
-            name: actual_model_name, // This should be the model's identifier name
-            description: node.metadata.comment.clone(), // Use metadata.comment as the source for description
+            name: actual_model_name.clone(),
+            description: node_metadata.comment.clone(), // Access comment via node_metadata ref
            data_source_name: default_data_source_name.cloned(),
-            database: node.database.clone().or_else(|| default_database.cloned()),
-            schema: node.schema.clone().or_else(|| default_schema.cloned()),
+            database: node.database.clone().or_else(|| default_database.cloned()), // node.database is Option<String>
+            schema: node.schema.clone().or_else(|| default_schema.cloned()),     // node.schema is Option<String>
            dimensions,
            measures,
-            original_file_path: Some(original_file_path_str.clone()), // Keep original dbt model path for reference
+            original_file_path: Some(original_file_path_str.clone()),
        };

        // Determine the output path for this individual YAML model
--- a/cli/libs/dbt_utils/src/models.rs
+++ b/cli/libs/dbt_utils/src/models.rs
@ -5,9 +5,11 @@ use std::collections::HashMap;

 #[derive(Debug, Deserialize, Clone)]
 pub struct DbtCatalog {
-    pub metadata: DbtCatalogMetadata,
+    #[serde(default)]
+    pub metadata: Option<DbtCatalogMetadata>,
+    #[serde(default)]
    pub nodes: HashMap<String, DbtNode>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[serde(default)]
    pub sources: Option<HashMap<String, DbtSource>>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub macros: Option<HashMap<String, serde_json::Value>>,
@ -29,8 +31,8 @@ pub struct DbtCatalog {

 #[derive(Debug, Deserialize, Clone)]
 pub struct DbtCatalogMetadata {
-    #[serde(rename = "dbt_schema_version")]
-    pub dbt_schema_version: String,
+    #[serde(rename = "dbt_schema_version", default)]
+    pub dbt_schema_version: Option<String>,
    #[allow(dead_code)] // If not used directly by Buster, but good for complete parsing
    pub dbt_version: Option<String>,
    #[allow(dead_code)]
@ -47,7 +49,9 @@ pub struct DbtNode {
    // However, standard dbt catalog.json *does* have a metadata block within each node.
    // The example provided might be a slight simplification or custom representation.
    // Assuming standard catalog structure for now, where DbtNodeMetadata is a separate struct.
-    pub metadata: DbtNodeMetadata, 
+    #[serde(default)]
+    pub metadata: Option<DbtNodeMetadata>, 
+    #[serde(default)]
    pub columns: HashMap<String, DbtColumn>,
    #[serde(rename = "resource_type")] // if resource_type is not directly in JSON, this helps map if some other key exists
                                     // if type is the key in JSON for resource_type, then it should be:
@ -84,13 +88,18 @@ pub struct DbtNodeMetadata {

 #[derive(Debug, Deserialize, Clone)]
 pub struct DbtSource {
-    pub name: String, // This is the source's table name
+    #[serde(default)]
+    pub name: Option<String>, // This is the source's table name
    pub unique_id: String,
+    #[serde(default)]
    pub database: Option<String>,
+    #[serde(default)]
    pub schema: Option<String>,
    #[serde(default, alias = "resource_type")] // Sources have "source" as resource_type, or a specific table type.
    pub table_type: Option<String>, // e.g. "table", often not explicitly a 'type' field in catalog for sources, but implied.
+    #[serde(default)]
    pub columns: HashMap<String, DbtColumn>,
+    #[serde(default)]
    pub comment: Option<String>,
    pub stats: Option<serde_json::Value>,
    // Sources can also have a 'meta' field, 'tags', 'description', 'loader', 'freshness' etc.