mirror of https://github.com/buster-so/buster.git
parsing works, but still skipping too much
This commit is contained in:
parent
70b92895eb
commit
b8128dc75c
|
@ -240,18 +240,28 @@ pub async fn generate_semantic_models_command(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Ensure metadata.name exists, as it's crucial for the semantic model name
|
||||
let Some(ref dbt_model_name_for_yaml_from_metadata) = dbt_node.metadata.name else {
|
||||
// Ensure metadata and metadata.name exist
|
||||
let Some(ref dbt_node_metadata) = dbt_node.metadata else {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!(
|
||||
"Warning: Skipping dbt model with unique_id: {} because its 'metadata.name' is missing in catalog.json.",
|
||||
"Warning: Skipping dbt node with unique_id: {} in generate because its 'metadata' block is missing.",
|
||||
dbt_node.unique_id
|
||||
).yellow()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
let dbt_model_name_for_yaml = dbt_model_name_for_yaml_from_metadata.clone(); // Now safe to clone
|
||||
let Some(ref dbt_model_name_from_metadata) = dbt_node_metadata.name else {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!(
|
||||
"Warning: Skipping dbt model with unique_id: {} in generate because its 'metadata.name' is missing.",
|
||||
dbt_node.unique_id
|
||||
).yellow()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
let dbt_model_name_for_yaml = dbt_model_name_from_metadata.clone();
|
||||
|
||||
dbt_models_processed_count += 1;
|
||||
// --- End Scoping Logic ---
|
||||
|
@ -290,7 +300,7 @@ pub async fn generate_semantic_models_command(
|
|||
model_was_updated = true;
|
||||
}
|
||||
|
||||
if let Some(dbt_comment) = &dbt_node.metadata.comment {
|
||||
if let Some(dbt_comment) = &dbt_node_metadata.comment {
|
||||
if existing_model.description.as_deref() != Some(dbt_comment.as_str()) {
|
||||
existing_model.description = Some(dbt_comment.clone());
|
||||
model_was_updated = true;
|
||||
|
@ -361,7 +371,7 @@ pub async fn generate_semantic_models_command(
|
|||
|
||||
for (col_name, dbt_col) in dbt_columns_map {
|
||||
println!(" Adding new column '{}' to semantic model '{}'", col_name.green(), dbt_model_name_for_yaml);
|
||||
if is_measure_type(&dbt_col.column_type) {
|
||||
if crate::commands::init::is_measure_type(Some(dbt_col.column_type.as_str())) { // Assuming dbt_col.column_type is String
|
||||
current_measures.push(YamlMeasure { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()) });
|
||||
} else {
|
||||
current_dims.push(YamlDimension { name: dbt_col.name.clone(), description: dbt_col.comment.clone(), type_: Some(dbt_col.column_type.clone()), searchable: false, options: None });
|
||||
|
@ -385,17 +395,27 @@ pub async fn generate_semantic_models_command(
|
|||
println!("Generating new semantic model: {} at {}", dbt_model_name_for_yaml.green(), individual_semantic_yaml_path.display());
|
||||
let mut dimensions = Vec::new();
|
||||
let mut measures = Vec::new();
|
||||
for (_col_name, col) in &dbt_node.columns {
|
||||
if is_measure_type(&col.column_type) {
|
||||
measures.push(YamlMeasure { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()) });
|
||||
for (_col_name, col) in &dbt_node.columns { // dbt_node.columns defaults to empty if missing
|
||||
if crate::commands::init::is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String
|
||||
measures.push(YamlMeasure {
|
||||
name: col.name.clone(),
|
||||
description: col.comment.clone(),
|
||||
type_: Some(col.column_type.clone())
|
||||
});
|
||||
} else {
|
||||
dimensions.push(YamlDimension { name: col.name.clone(), description: col.comment.clone(), type_: Some(col.column_type.clone()), searchable: false, options: None });
|
||||
dimensions.push(YamlDimension {
|
||||
name: col.name.clone(),
|
||||
description: col.comment.clone(),
|
||||
type_: Some(col.column_type.clone()),
|
||||
searchable: false,
|
||||
options: None
|
||||
});
|
||||
}
|
||||
}
|
||||
let new_model = YamlModel {
|
||||
name: dbt_model_name_for_yaml.clone(),
|
||||
description: dbt_node.metadata.comment.clone(),
|
||||
data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()), // Default from first project context
|
||||
description: dbt_node_metadata.comment.clone(), // Use dbt_node_metadata
|
||||
data_source_name: buster_config.projects.as_ref().and_then(|p|p.first()).and_then(|pc|pc.data_source_name.clone()),
|
||||
database: dbt_node.database.clone(),
|
||||
schema: dbt_node.schema.clone(),
|
||||
dimensions,
|
||||
|
|
|
@ -84,16 +84,21 @@ pub fn is_false(val: &bool) -> bool {
|
|||
}
|
||||
|
||||
// Helper function to determine if a SQL type should be a measure
|
||||
pub fn is_measure_type(sql_type: &str) -> bool {
|
||||
let lower_sql_type = sql_type.to_lowercase();
|
||||
lower_sql_type.contains("int") ||
|
||||
lower_sql_type.contains("numeric") ||
|
||||
lower_sql_type.contains("decimal") ||
|
||||
lower_sql_type.contains("real") ||
|
||||
lower_sql_type.contains("double") ||
|
||||
lower_sql_type.contains("float") ||
|
||||
lower_sql_type.contains("money") ||
|
||||
lower_sql_type.contains("number")
|
||||
pub fn is_measure_type(sql_type_opt: Option<&str>) -> bool {
|
||||
match sql_type_opt {
|
||||
Some(sql_type) => {
|
||||
let lower_sql_type = sql_type.to_lowercase();
|
||||
lower_sql_type.contains("int") ||
|
||||
lower_sql_type.contains("numeric") ||
|
||||
lower_sql_type.contains("decimal") ||
|
||||
lower_sql_type.contains("real") ||
|
||||
lower_sql_type.contains("double") ||
|
||||
lower_sql_type.contains("float") ||
|
||||
lower_sql_type.contains("money") ||
|
||||
lower_sql_type.contains("number")
|
||||
}
|
||||
None => false, // If type is missing, default to not a measure (dimension)
|
||||
}
|
||||
}
|
||||
|
||||
// Enum for Database Type selection (ensure only one definition, placed before use)
|
||||
|
@ -779,16 +784,25 @@ async fn generate_semantic_models_from_dbt_catalog(
|
|||
eprintln!(
|
||||
"{}",
|
||||
format!(
|
||||
"Warning: Skipping dbt model {} (unique_id: {}) because it is missing 'original_file_path' in catalog.json.",
|
||||
node.name.as_deref().unwrap_or("[unknown name]"), // Use derived node.name if available
|
||||
"Warning: Skipping dbt model unique_id: {} because it is missing 'original_file_path' in catalog.json.",
|
||||
node.unique_id
|
||||
).yellow()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
// Ensure metadata.name exists, as it's crucial for the semantic model name
|
||||
let Some(ref actual_model_name_from_metadata) = node.metadata.name else {
|
||||
// Ensure metadata and metadata.name exist, as it's crucial for the semantic model name
|
||||
let Some(ref node_metadata) = node.metadata else {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!(
|
||||
"Warning: Skipping dbt model with unique_id: {} because its 'metadata' block is missing in catalog.json.",
|
||||
node.unique_id
|
||||
).yellow()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
let Some(ref actual_model_name_from_metadata) = node_metadata.name else {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!(
|
||||
|
@ -798,7 +812,7 @@ async fn generate_semantic_models_from_dbt_catalog(
|
|||
);
|
||||
continue;
|
||||
};
|
||||
let actual_model_name = actual_model_name_from_metadata.clone(); // Now safe to clone
|
||||
let actual_model_name = actual_model_name_from_metadata.clone();
|
||||
|
||||
let original_file_path_abs = buster_config_dir.join(original_file_path_str);
|
||||
|
||||
|
@ -821,33 +835,33 @@ async fn generate_semantic_models_from_dbt_catalog(
|
|||
let mut dimensions: Vec<YamlDimension> = Vec::new();
|
||||
let mut measures: Vec<YamlMeasure> = Vec::new();
|
||||
|
||||
for (_col_name, col) in &node.columns {
|
||||
if is_measure_type(&col.column_type) {
|
||||
for (_col_name, col) in &node.columns { // node.columns is HashMap, defaults to empty if missing
|
||||
if is_measure_type(Some(col.column_type.as_str())) { // Assuming col.column_type is String here based on linter
|
||||
measures.push(YamlMeasure {
|
||||
name: col.name.clone(),
|
||||
description: col.comment.clone(),
|
||||
type_: Some(col.column_type.clone()),
|
||||
name: col.name.clone(),
|
||||
description: col.comment.clone(),
|
||||
type_: Some(col.column_type.clone()), // Wrap in Some()
|
||||
});
|
||||
} else {
|
||||
dimensions.push(YamlDimension {
|
||||
name: col.name.clone(),
|
||||
description: col.comment.clone(),
|
||||
type_: Some(col.column_type.clone()),
|
||||
searchable: false, // Default to false, user can change
|
||||
type_: Some(col.column_type.clone()), // Wrap in Some()
|
||||
searchable: false,
|
||||
options: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let yaml_model = YamlModel {
|
||||
name: actual_model_name, // This should be the model's identifier name
|
||||
description: node.metadata.comment.clone(), // Use metadata.comment as the source for description
|
||||
name: actual_model_name.clone(),
|
||||
description: node_metadata.comment.clone(), // Access comment via node_metadata ref
|
||||
data_source_name: default_data_source_name.cloned(),
|
||||
database: node.database.clone().or_else(|| default_database.cloned()),
|
||||
schema: node.schema.clone().or_else(|| default_schema.cloned()),
|
||||
database: node.database.clone().or_else(|| default_database.cloned()), // node.database is Option<String>
|
||||
schema: node.schema.clone().or_else(|| default_schema.cloned()), // node.schema is Option<String>
|
||||
dimensions,
|
||||
measures,
|
||||
original_file_path: Some(original_file_path_str.clone()), // Keep original dbt model path for reference
|
||||
original_file_path: Some(original_file_path_str.clone()),
|
||||
};
|
||||
|
||||
// Determine the output path for this individual YAML model
|
||||
|
|
|
@ -5,9 +5,11 @@ use std::collections::HashMap;
|
|||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct DbtCatalog {
|
||||
pub metadata: DbtCatalogMetadata,
|
||||
#[serde(default)]
|
||||
pub metadata: Option<DbtCatalogMetadata>,
|
||||
#[serde(default)]
|
||||
pub nodes: HashMap<String, DbtNode>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub sources: Option<HashMap<String, DbtSource>>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub macros: Option<HashMap<String, serde_json::Value>>,
|
||||
|
@ -29,8 +31,8 @@ pub struct DbtCatalog {
|
|||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct DbtCatalogMetadata {
|
||||
#[serde(rename = "dbt_schema_version")]
|
||||
pub dbt_schema_version: String,
|
||||
#[serde(rename = "dbt_schema_version", default)]
|
||||
pub dbt_schema_version: Option<String>,
|
||||
#[allow(dead_code)] // If not used directly by Buster, but good for complete parsing
|
||||
pub dbt_version: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
|
@ -47,7 +49,9 @@ pub struct DbtNode {
|
|||
// However, standard dbt catalog.json *does* have a metadata block within each node.
|
||||
// The example provided might be a slight simplification or custom representation.
|
||||
// Assuming standard catalog structure for now, where DbtNodeMetadata is a separate struct.
|
||||
pub metadata: DbtNodeMetadata,
|
||||
#[serde(default)]
|
||||
pub metadata: Option<DbtNodeMetadata>,
|
||||
#[serde(default)]
|
||||
pub columns: HashMap<String, DbtColumn>,
|
||||
#[serde(rename = "resource_type")] // if resource_type is not directly in JSON, this helps map if some other key exists
|
||||
// if type is the key in JSON for resource_type, then it should be:
|
||||
|
@ -84,13 +88,18 @@ pub struct DbtNodeMetadata {
|
|||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct DbtSource {
|
||||
pub name: String, // This is the source's table name
|
||||
#[serde(default)]
|
||||
pub name: Option<String>, // This is the source's table name
|
||||
pub unique_id: String,
|
||||
#[serde(default)]
|
||||
pub database: Option<String>,
|
||||
#[serde(default)]
|
||||
pub schema: Option<String>,
|
||||
#[serde(default, alias = "resource_type")] // Sources have "source" as resource_type, or a specific table type.
|
||||
pub table_type: Option<String>, // e.g. "table", often not explicitly a 'type' field in catalog for sources, but implied.
|
||||
#[serde(default)]
|
||||
pub columns: HashMap<String, DbtColumn>,
|
||||
#[serde(default)]
|
||||
pub comment: Option<String>,
|
||||
pub stats: Option<serde_json::Value>,
|
||||
// Sources can also have a 'meta' field, 'tags', 'description', 'loader', 'freshness' etc.
|
||||
|
|
Loading…
Reference in New Issue