modelfile helper and starting point for fields.

This commit is contained in:
Dallin Bentley 2024-11-25 11:46:48 -07:00
parent a6c5dff778
commit 1c5d13b25d
10 changed files with 270 additions and 79 deletions

View File

@ -1 +1,30 @@
# buster-cli
# buster-cli
A CLI tool for creating and managing your semantic model in Buster.
This tool is two-way compatible with your dbt projects as well. We like dbt and think its a great tool,
## Installation
TODO
## How does it work?
You can imagine Buster as a layer on top of your dbt project that allows you to create and manage semantic models. We collect extra metadata about your models, however dbt semantic models don't allow you to have extra fields than what they've defined. When you run `buster deploy`, we will createa a dbt-compatible copy that is used to run the dbt commands.
## Quick Start
1. Obtain your Buster API key. You can create one [here](https://platform.buster.so/app/settings/api-keys).
Initialize your project by running:
```bash
buster init
```
This command will go through the following steps:
1. Authenticate with your Buster API key.
2. Checks to see if you have an existing dbt project. If you do, you will be prompted to use the existing project or create a new one.
- If you choose to use the existing project, Buster will use the existing project to create semantic model files.

View File

@ -0,0 +1,96 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct DbtModel {
pub version: u32,
pub semantic_models: Vec<SemanticModel>,
}
#[derive(Serialize, Deserialize)]
pub struct SemanticModel {
pub name: String,
pub description: Option<String>,
pub model: String,
pub defaults: Defaults,
pub aliases: Option<Vec<String>>,
pub entities: Vec<Entity>,
pub measures: Option<Vec<Measure>>,
pub dimensions: Option<Vec<Dimension>>,
}
#[derive(Serialize, Deserialize)]
pub struct Defaults {
pub agg_time_dimension: String,
}
#[derive(Serialize, Deserialize)]
pub struct Entity {
pub name: String,
#[serde(rename = "type")]
pub entity_type: EntityType,
pub description: Option<String>,
pub expr: Option<String>,
pub join_type: Option<JoinType>,
pub relationship_type: Option<RelationshipType>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EntityType {
Primary,
Natural,
Foreign,
Unique,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum JoinType {
AlwaysLeft,
Inner,
FullOuter,
Cross,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RelationshipType {
OneToOne,
OneToMany,
ManyToOne,
ManyToMany,
}
#[derive(Serialize, Deserialize)]
pub struct Measure {
pub name: String,
pub description: Option<String>,
pub agg: String,
pub expr: String,
pub agg_params: Option<String>,
pub agg_time_dimension: Option<String>,
pub non_additive_dimension: Option<String>,
pub alias: Option<Vec<String>>,
}
#[derive(Serialize, Deserialize)]
pub struct Dimension {
pub name: String,
#[serde(rename = "type")]
pub dimension_type: DimensionType,
pub label: Option<String>,
pub type_params: String,
pub description: Option<String>,
pub expr: Option<String>,
pub sql: Option<String>,
pub searchable: Option<bool>,
pub alias: Option<Vec<String>>,
pub timezone: Option<String>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DimensionType {
Categorical,
Time,
}

View File

@ -6,12 +6,14 @@ semantic_models:
model: ref('some_model') ## Required: the database identifier of the table/view/mv that this semantic model relates to.
defaults: ## Required TODO: figure out exactly what this is.
agg_time_dimension: dimension_name ## Required if the model contains measures
aliases: [] ## Optional field that allows you to alias the semantic model, we will use this to replace the model name in the queries.
entities:
- name: <name of the entity> ## Required
type: Primary or natural or foreign or unique ## Required
description: A description of the field or role the entity takes in this table ## Optional
expr: The field that denotes that entity (transaction_id). ## Optional
join_type: many-to-one or one-to-one or one-to-many ## Required on foreign entities helps buster understand how to join the entity to the table.
join_type: inner ## Required on foreign entities. This helps buster understand how to join the entity to the table. [always_left, inner, full_outer, cross]
relationship_type: one_to_one ## Required on foreign entities. This helps buster understand the relationship between the entity and the table. [one_to_one, one_to_many, many_to_one, many_to_many]
measures:
- name: <name of the measure>
description: "same as always" ## Optional
@ -20,6 +22,7 @@ semantic_models:
agg_params: "specific aggregation properties such as a percentile" ## Optional
agg_time_dimension: The time field. Defaults to the default agg time dimension for the semantic model. ## Optional
non_additive_dimension: "Use these configs when you need non-additive dimensions." ## Optional
alias: ["alias for the measure"] ## Optional
dimensions:
- name: <name of the dimension> # Required
type: Categorical or Time # Required
@ -27,3 +30,8 @@ semantic_models:
type_params: Specific type params such as if the time is primary or used as a partition # Required
description: Same as always # Optional
expr: The column name or expression. If not provided the default is the dimension name # Optional
sql: Can be sql that defines the dimension... like a case statement or something. # Optional
searchable: boolean # Optional if toggled to true, we search the dimension values based on the user search.
alias: ["alias for the dimension"] # Optional
timezone: "America/Los_Angeles" # Optional defaults to UTC.

View File

@ -1,67 +0,0 @@
pub struct DbtModel {
pub version: u32,
pub models: Vec<Model>,
}
pub struct Model {
pub name: String,
pub description: Option<String>,
pub docs: Option<Docs>,
pub latest_version: Option<String>,
pub deprecation_date: Option<String>,
pub access: Option<Access>,
pub config: Option<std::collections::HashMap<String, String>>,
pub constraints: Option<Vec<String>>,
pub tests: Option<Vec<String>>,
pub columns: Option<Vec<Column>>,
pub time_spine: Option<TimeSpine>,
pub versions: Option<Vec<Version>>,
}
pub struct Docs {
pub show: Option<bool>,
pub node_color: Option<String>,
}
pub enum Access {
Private,
Protected,
Public,
}
pub struct Column {
pub name: String,
pub description: Option<String>,
pub meta: Option<std::collections::HashMap<String, String>>,
pub quote: Option<bool>,
pub constraints: Option<Vec<String>>,
pub tests: Option<Vec<String>>,
pub tags: Option<Vec<String>>,
pub granularity: Option<String>,
}
pub struct TimeSpine {
pub standard_granularity_column: String,
}
pub struct Version {
pub v: String,
pub defined_in: Option<String>,
pub description: Option<String>,
pub docs: Option<Docs>,
pub access: Option<Access>,
pub constraints: Option<Vec<String>>,
pub config: Option<std::collections::HashMap<String, String>>,
pub tests: Option<Vec<String>>,
pub columns: Option<Vec<VersionColumn>>,
}
pub struct VersionColumn {
pub include: Option<String>,
pub exclude: Option<Vec<String>>,
pub name: Option<String>,
pub quote: Option<bool>,
pub constraints: Option<Vec<String>>,
pub tests: Option<Vec<String>>,
pub tags: Option<Vec<String>>,
}

View File

@ -1,13 +1,37 @@
use anyhow::Result;
use crate::utils::credentials::get_and_validate_buster_credentials;
use super::auth;
pub async fn init() -> Result<()> {
// check for buster credentials
// Get buster credentials
let buster_creds = match get_and_validate_buster_credentials().await {
Ok(buster_creds) => Some(buster_creds),
Err(_) => {
println!("No Buster credentials found. Beginning authentication flow...");
None
}
};
// If no buster credentials, go through auth flow.
if let None = buster_creds {
match auth().await {
Ok(_) => (),
Err(e) => anyhow::bail!("Failed to authenticate: {}", e),
};
};
// TODO: Check for dbt .profiles? create one if not exists.
// check if existing dbt project
let dbt_project_exists = tokio::fs::try_exists("dbt_project.yml").await?;
// if dbt project, check for dbt yml files
// If dbt project, ask if they want to piggyback off the existing project.
// create buster project
// If no, create new example project
// If no dbt project, create new example project
Ok(())
}

View File

@ -3,6 +3,8 @@ use thiserror::Error;
#[derive(Debug, Error)]
pub enum BusterError {
#[error("Invalid credentials")]
InvalidCredentials,
#[error("File not found: {path}")]
FileNotFound { path: PathBuf },
#[error("Failed to parse file: {error}")]

View File

@ -30,7 +30,7 @@ pub async fn get_buster_credentials() -> Result<BusterCredentials, BusterError>
Err(_) => return Err(BusterError::FileNotFound { path }),
};
let creds_yaml = match serde_yaml::from_str(&contents) {
let creds_yaml: BusterCredentials = match serde_yaml::from_str(&contents) {
Ok(creds_yaml) => creds_yaml,
Err(e) => {
return Err(BusterError::ParseError {
@ -42,16 +42,31 @@ pub async fn get_buster_credentials() -> Result<BusterCredentials, BusterError>
Ok(creds_yaml)
}
pub async fn get_and_validate_buster_credentials() -> Result<BusterCredentials, BusterError> {
let creds = match get_buster_credentials().await {
Ok(creds) => creds,
Err(e) => return Err(e),
};
if creds.api_key.is_empty() {
return Err(BusterError::InvalidCredentials);
}
Ok(creds)
}
pub async fn set_buster_credentials(creds: BusterCredentials) -> Result<(), BusterError> {
let mut path = home_dir().unwrap_or_default();
path.push(".buster");
// Create .buster directory if it doesn't exist
if !path.exists() {
fs::create_dir_all(&path).await.map_err(|e| BusterError::FileWriteError {
path: path.clone(),
error: e.to_string(),
})?;
fs::create_dir_all(&path)
.await
.map_err(|e| BusterError::FileWriteError {
path: path.clone(),
error: e.to_string(),
})?;
}
path.push("credentials.yml");

View File

@ -0,0 +1,49 @@
use serde_yaml::{Mapping, Value};
pub fn convert_buster_to_dbt_model(buster_yaml: &str) -> Result<String, anyhow::Error> {
let mut yaml_value: Value = serde_yaml::from_str(buster_yaml)?;
if let Value::Mapping(ref mut map) = yaml_value {
if let Some(Value::Sequence(semantic_models)) = map.get_mut("semantic_models") {
for model in semantic_models.iter_mut() {
if let Value::Mapping(model_map) = model {
// Remove Buster-specific fields
model_map.remove("aliases");
// Clean up entities
if let Some(Value::Sequence(entities)) = model_map.get_mut("entities") {
for entity in entities.iter_mut() {
if let Value::Mapping(entity_map) = entity {
entity_map.remove("join_type");
entity_map.remove("relationship_type");
}
}
}
// Clean up dimensions
if let Some(Value::Sequence(dimensions)) = model_map.get_mut("dimensions") {
for dim in dimensions.iter_mut() {
if let Value::Mapping(dim_map) = dim {
dim_map.remove("searchable");
dim_map.remove("alias");
dim_map.remove("timezone");
dim_map.remove("sql");
}
}
}
// Clean up measures
if let Some(Value::Sequence(measures)) = model_map.get_mut("measures") {
for measure in measures.iter_mut() {
if let Value::Mapping(measure_map) = measure {
measure_map.remove("alias");
}
}
}
}
}
}
}
Ok(serde_yaml::to_string(&yaml_value)?)
}

View File

@ -27,4 +27,5 @@ pub async fn get_dbt_profiles_yml() -> Result<Value> {
let contents = fs::read_to_string(path).await?;
Ok(serde_yaml::from_str(&contents)?)
}
}

View File

@ -0,0 +1,34 @@
#[test]
fn test_convert_buster_to_dbt_model() {
let buster_yaml = r#"
version: 2
semantic_models:
- name: test_model
aliases: ["alias1"]
entities:
- name: entity1
type: Primary
join_type: inner
relationship_type: one_to_one
dimensions:
- name: dim1
type: Categorical
searchable: true
alias: ["dim_alias"]
timezone: "UTC"
measures:
- name: measure1
agg: sum
alias: ["measure_alias"]
"#;
let dbt_yaml = convert_buster_to_dbt_model(buster_yaml).unwrap();
// The converted YAML shouldn't contain Buster-specific fields
assert!(!dbt_yaml.contains("aliases"));
assert!(!dbt_yaml.contains("join_type"));
assert!(!dbt_yaml.contains("relationship_type"));
assert!(!dbt_yaml.contains("searchable"));
assert!(!dbt_yaml.contains("timezone"));
assert!(!dbt_yaml.contains("alias"));
}