2025-05-07 01:39:42 +08:00
|
|
|
use serde::Deserialize;
|
|
|
|
use std::collections::HashMap;
|
|
|
|
|
2025-05-07 02:51:47 +08:00
|
|
|
// Struct definitions for parsing dbt's catalog.json.
|
2025-05-07 01:39:42 +08:00
|
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtCatalog {
|
|
|
|
pub metadata: DbtCatalogMetadata,
|
|
|
|
pub nodes: HashMap<String, DbtNode>,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub sources: Option<HashMap<String, DbtSource>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub macros: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub exposures: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub metrics: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub selectors: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub disabled: Option<HashMap<String, Vec<serde_json::Value>>>, // dbt-core uses Vec here
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub parent_map: Option<HashMap<String, Vec<String>>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub child_map: Option<HashMap<String, Vec<String>>>,
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
pub errors: Option<serde_json::Value>, // Can be null or an object with error details
|
2025-05-07 01:39:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtCatalogMetadata {
|
|
|
|
#[serde(rename = "dbt_schema_version")]
|
|
|
|
pub dbt_schema_version: String,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[allow(dead_code)] // If not used directly by Buster, but good for complete parsing
|
|
|
|
pub dbt_version: Option<String>,
|
|
|
|
#[allow(dead_code)]
|
|
|
|
pub generated_at: Option<String>,
|
|
|
|
#[allow(dead_code)]
|
|
|
|
pub invocation_id: Option<String>,
|
2025-05-07 01:39:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtNode {
|
2025-05-07 02:51:47 +08:00
|
|
|
// Ensure metadata is present, matches example which has it implicitly via direct fields
|
|
|
|
// For the example catalog's node structure, we might need to flatten some metadata fields
|
|
|
|
// or expect them directly if `metadata` as a block is not always there.
|
|
|
|
// However, standard dbt catalog.json *does* have a metadata block within each node.
|
|
|
|
// The example provided might be a slight simplification or custom representation.
|
|
|
|
// Assuming standard catalog structure for now, where DbtNodeMetadata is a separate struct.
|
|
|
|
pub metadata: DbtNodeMetadata,
|
2025-05-07 01:39:42 +08:00
|
|
|
pub columns: HashMap<String, DbtColumn>,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[serde(rename = "resource_type")] // if resource_type is not directly in JSON, this helps map if some other key exists
|
|
|
|
// if type is the key in JSON for resource_type, then it should be:
|
|
|
|
// #[serde(alias = "type")] // or handle it in DbtNodeMetadata if type is part of metadata
|
|
|
|
#[serde(default)] // Make it optional and handle missing field
|
|
|
|
pub resource_type: Option<String>, // This refers to model, seed, snapshot, test etc.
|
2025-05-07 01:39:42 +08:00
|
|
|
pub unique_id: String,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[serde(default)] // original_file_path might not be present for all node types
|
|
|
|
pub original_file_path: Option<String>,
|
2025-05-07 01:39:42 +08:00
|
|
|
pub database: Option<String>,
|
|
|
|
pub schema: Option<String>,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[serde(default)] // Make name optional
|
|
|
|
pub name: Option<String>, // This is often the filename or alias. metadata.name is relation name.
|
|
|
|
pub comment: Option<String>, // Comment can be directly on the node for some versions/types
|
|
|
|
pub stats: Option<serde_json::Value>, // To capture general stats blocks
|
2025-05-07 01:39:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtNodeMetadata {
|
2025-05-07 02:51:47 +08:00
|
|
|
// Standard dbt catalog.json has `name` here as the relation name.
|
|
|
|
#[serde(default)] // Make name optional
|
|
|
|
pub name: Option<String>,
|
|
|
|
#[serde(rename = "type")] // This 'type' inside metadata usually refers to the materialization (table, view, etc.) for models
|
2025-05-07 01:39:42 +08:00
|
|
|
pub relation_type: Option<String>,
|
2025-05-07 02:51:47 +08:00
|
|
|
pub schema: Option<String>, // schema can also be here
|
|
|
|
pub database: Option<String>, // database can also be here
|
|
|
|
pub comment: Option<String>, // comment for the model/node itself
|
2025-05-07 01:39:42 +08:00
|
|
|
#[allow(dead_code)]
|
|
|
|
pub owner: Option<String>,
|
2025-05-07 02:51:47 +08:00
|
|
|
// Add other potential metadata fields if necessary, e.g., tags, config, etc.
|
|
|
|
#[serde(default)]
|
|
|
|
pub tags: Vec<String>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtSource {
|
|
|
|
pub name: String, // This is the source's table name
|
|
|
|
pub unique_id: String,
|
|
|
|
pub database: Option<String>,
|
|
|
|
pub schema: Option<String>,
|
|
|
|
#[serde(default, alias = "resource_type")] // Sources have "source" as resource_type, or a specific table type.
|
|
|
|
pub table_type: Option<String>, // e.g. "table", often not explicitly a 'type' field in catalog for sources, but implied.
|
|
|
|
pub columns: HashMap<String, DbtColumn>,
|
|
|
|
pub comment: Option<String>,
|
|
|
|
pub stats: Option<serde_json::Value>,
|
|
|
|
// Sources can also have a 'meta' field, 'tags', 'description', 'loader', 'freshness' etc.
|
|
|
|
#[serde(default)]
|
|
|
|
pub description: Option<String>, // description is preferred over comment for sources usually
|
|
|
|
#[serde(default)]
|
|
|
|
pub meta: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default)]
|
|
|
|
pub tags: Vec<String>,
|
2025-05-07 01:39:42 +08:00
|
|
|
}
|
|
|
|
|
2025-05-07 02:51:47 +08:00
|
|
|
|
2025-05-07 01:39:42 +08:00
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
|
|
pub struct DbtColumn {
|
|
|
|
#[serde(rename = "type")]
|
|
|
|
pub column_type: String,
|
2025-05-07 02:51:47 +08:00
|
|
|
pub index: Option<u32>, // Index might not always be present
|
2025-05-07 01:39:42 +08:00
|
|
|
pub name: String,
|
|
|
|
pub comment: Option<String>,
|
2025-05-07 02:51:47 +08:00
|
|
|
#[serde(default)]
|
|
|
|
pub description: Option<String>, // Columns can also have descriptions
|
|
|
|
#[serde(default)]
|
|
|
|
pub meta: Option<HashMap<String, serde_json::Value>>,
|
|
|
|
#[serde(default)]
|
|
|
|
pub tags: Vec<String>,
|
2025-05-07 01:39:42 +08:00
|
|
|
}
|