feat: enhance dataset validation and deployment error handling

- Add detailed validation error logging in CLI
- Improve type compatibility checks in dataset validation
- Modify deployment process to handle and report validation errors more comprehensively
- Add Hash derive for Verification enum
- Update API and CLI to support more informative validation results
This commit is contained in:
dal 2025-02-05 17:04:13 -07:00
parent 3c82ac0774
commit f081f3e16e
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
8 changed files with 220 additions and 41 deletions

View File

@ -441,6 +441,7 @@ impl FromSql<sql_types::VerificationEnum, Pg> for Verification {
Copy,
PartialEq,
Eq,
Hash,
diesel::AsExpression,
diesel::FromSqlRow,
)]

View File

@ -219,23 +219,31 @@ pub async fn deploy_datasets(
}
};
if !validation.success {
results.push(validation);
continue;
}
results.push(validation.clone()); // Clone validation result before checking success
// Deploy model
match deploy_single_model(&req, &organization_id, &user.id).await {
Ok(_) => results.push(validation),
Err(e) => {
let mut validation = validation;
validation.success = false;
validation.add_error(ValidationError::data_source_error(e.to_string()));
results.push(validation);
// Only deploy if validation passed
if validation.success {
// Deploy model
match deploy_single_model(&req, &organization_id, &user.id).await {
Ok(_) => (),
Err(e) => {
let mut failed_validation = validation;
failed_validation.success = false;
failed_validation.add_error(ValidationError::data_source_error(e.to_string()));
results.pop(); // Remove the successful validation
results.push(failed_validation); // Add the failed one
}
}
}
}
// Check if any validations failed
let has_failures = results.iter().any(|r| !r.success);
if has_failures {
tracing::warn!("Some models failed validation");
return Ok(ApiResponse::JsonData(DeployDatasetsResponse { results }));
}
Ok(ApiResponse::JsonData(DeployDatasetsResponse { results }))
}

View File

@ -8,7 +8,10 @@ use crate::{
credentials::get_data_source_credentials,
import_dataset_columns::retrieve_dataset_columns,
},
validation::types::{ValidationError, ValidationResult},
validation::{
types::{ValidationError, ValidationResult},
type_mapping::{normalize_type, types_compatible},
},
},
};
@ -72,11 +75,13 @@ pub async fn validate_model(
// Validate each column
for (col_name, col_type) in columns {
if let Some(ds_col) = ds_columns.iter().find(|c| c.name == *col_name) {
if !types_compatible(&ds_col.type_, col_type) {
if !types_compatible(data_source.type_, &ds_col.type_, col_type) {
let ds_type = normalize_type(data_source.type_, &ds_col.type_);
let model_type = normalize_type(data_source.type_, col_type);
result.add_error(ValidationError::type_mismatch(
col_name,
col_type,
&ds_col.type_,
&model_type.to_string(),
&ds_type.to_string(),
));
}
} else {
@ -87,12 +92,6 @@ pub async fn validate_model(
Ok(result)
}
// Basic type compatibility check - will be enhanced in Phase 2
fn types_compatible(ds_type: &str, model_type: &str) -> bool {
// For now, just check exact match
ds_type.to_lowercase() == model_type.to_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -1,5 +1,7 @@
pub mod dataset_validation;
pub mod types;
pub mod type_mapping;
pub use dataset_validation::*;
pub use types::*;
pub use types::*;
pub use type_mapping::*;

View File

@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use crate::utils::{
BusterClient, DeployDatasetsRequest, DeployDatasetsColumnsRequest, DeployDatasetsEntityRelationshipsRequest,
buster_credentials::get_and_validate_buster_credentials,
buster_credentials::get_and_validate_buster_credentials, ValidationResult, ValidationError, ValidationErrorType,
};
#[derive(Debug, Deserialize, Serialize, Clone)]
@ -123,6 +123,81 @@ impl DeployProgress {
None => println!("⚠️ No SQL file found for '{}', using default SELECT", model_name),
}
}
pub fn log_validation_error(&self, validation: &ValidationResult) {
if !validation.success {
println!("\n❌ Validation failed for {}", validation.model_name);
println!(" Data Source: {}", validation.data_source_name);
println!(" Schema: {}", validation.schema);
// Group errors by type
let mut table_errors = Vec::new();
let mut column_errors = Vec::new();
let mut type_errors = Vec::new();
let mut other_errors = Vec::new();
for error in &validation.errors {
match error.error_type {
ValidationErrorType::TableNotFound => table_errors.push(error),
ValidationErrorType::ColumnNotFound => column_errors.push(error),
ValidationErrorType::TypeMismatch => type_errors.push(error),
ValidationErrorType::DataSourceError => other_errors.push(error),
}
}
// Print grouped errors
if !table_errors.is_empty() {
println!("\n Table/View Errors:");
for error in table_errors {
println!(" - {}", error.message);
}
}
if !column_errors.is_empty() {
println!("\n Column Errors:");
for error in column_errors {
if let Some(col) = &error.column_name {
println!(" - Column '{}': {}", col, error.message);
}
}
}
if !type_errors.is_empty() {
println!("\n Type Mismatch Errors:");
for error in type_errors {
if let Some(col) = &error.column_name {
println!(" - Column '{}': {}", col, error.message);
}
}
}
if !other_errors.is_empty() {
println!("\n Other Errors:");
for error in other_errors {
println!(" - {}", error.message);
}
}
// Print suggestions if any
let suggestions: Vec<_> = validation.errors
.iter()
.filter_map(|e| e.suggestion.as_ref())
.collect();
if !suggestions.is_empty() {
println!("\n💡 Suggestions:");
for suggestion in suggestions {
println!(" - {}", suggestion);
}
}
}
}
pub fn log_validation_success(&self, validation: &ValidationResult) {
println!("\n✅ Validation passed for {}", validation.model_name);
println!(" Data Source: {}", validation.data_source_name);
println!(" Schema: {}", validation.schema);
}
}
impl ModelFile {
@ -513,21 +588,54 @@ pub async fn deploy_v2(path: Option<&str>) -> Result<()> {
}
let data_source_name = deploy_requests[0].data_source_name.clone();
if let Err(e) = client.deploy_datasets(deploy_requests).await {
println!("\n❌ Deployment failed!");
println!("Error: {}", e);
println!("\n💡 Troubleshooting:");
println!("1. Check data source:");
println!(" - Verify '{}' exists in Buster", data_source_name);
println!(" - Confirm it has env='dev'");
println!(" - Check your access permissions");
println!("2. Check model definitions:");
println!(" - Validate SQL syntax");
println!(" - Verify column names match");
println!("3. Check relationships:");
println!(" - Ensure referenced models exist");
println!(" - Verify relationship types");
return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e));
match client.deploy_datasets(deploy_requests).await {
Ok(response) => {
let mut has_validation_errors = false;
// Process validation results
for validation in &response.results {
if validation.success {
progress.log_validation_success(validation);
} else {
has_validation_errors = true;
progress.log_validation_error(validation);
}
}
if has_validation_errors {
println!("\n❌ Deployment failed due to validation errors!");
println!("\n💡 Troubleshooting:");
println!("1. Check data source:");
println!(" - Verify '{}' exists in Buster", data_source_name);
println!(" - Confirm it has env='dev'");
println!(" - Check your access permissions");
println!("2. Check model definitions:");
println!(" - Validate SQL syntax");
println!(" - Verify column names match");
println!("3. Check relationships:");
println!(" - Ensure referenced models exist");
println!(" - Verify relationship types");
return Err(anyhow::anyhow!("Deployment failed due to validation errors"));
}
println!("\n✅ All models deployed successfully!");
}
Err(e) => {
println!("\n❌ Deployment failed!");
println!("Error: {}", e);
println!("\n💡 Troubleshooting:");
println!("1. Check data source:");
println!(" - Verify '{}' exists in Buster", data_source_name);
println!(" - Confirm it has env='dev'");
println!(" - Check your access permissions");
println!("2. Check model definitions:");
println!(" - Validate SQL syntax");
println!(" - Verify column names match");
println!("3. Check relationships:");
println!(" - Ensure referenced models exist");
println!(" - Verify relationship types");
return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e));
}
}
}

View File

@ -6,6 +6,7 @@ use reqwest::{
use super::{
PostDataSourcesRequest, DeployDatasetsRequest, ValidateApiKeyRequest, ValidateApiKeyResponse,
DeployDatasetsResponse,
};
pub struct BusterClient {
@ -85,7 +86,7 @@ impl BusterClient {
}
}
pub async fn deploy_datasets(&self, req_body: Vec<DeployDatasetsRequest>) -> Result<()> {
pub async fn deploy_datasets(&self, req_body: Vec<DeployDatasetsRequest>) -> Result<DeployDatasetsResponse> {
let headers = self.build_headers()?;
match self
@ -103,7 +104,7 @@ impl BusterClient {
res.text().await?
));
}
Ok(())
Ok(res.json().await?)
}
Err(e) => Err(anyhow::anyhow!("POST /api/v1/datasets/deploy failed: {}", e)),
}

View File

@ -56,3 +56,33 @@ pub struct DeployDatasetsEntityRelationshipsRequest {
#[serde(rename = "type")]
pub type_: String,
}
#[derive(Debug, Deserialize)]
pub struct ValidationResult {
pub success: bool,
pub model_name: String,
pub data_source_name: String,
pub schema: String,
pub errors: Vec<ValidationError>,
}
#[derive(Debug, Deserialize)]
pub struct ValidationError {
pub error_type: ValidationErrorType,
pub column_name: Option<String>,
pub message: String,
pub suggestion: Option<String>,
}
#[derive(Debug, Deserialize, PartialEq)]
pub enum ValidationErrorType {
TableNotFound,
ColumnNotFound,
TypeMismatch,
DataSourceError,
}
#[derive(Debug, Deserialize)]
pub struct DeployDatasetsResponse {
pub results: Vec<ValidationResult>,
}

30
logs/dbt.log Normal file
View File

@ -0,0 +1,30 @@
16:43:12.295485 [debug] [MainThread]: Sending event: {'category': 'dbt', 'action': 'invocation', 'label': 'start', 'context': [<snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x118927770>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x118946f00>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x118947b90>]}
16:43:12.299354 [debug] [MainThread]: An error was encountered while trying to send an event
============================== 16:43:12.299620 | 70b3c2c6-8fd9-4fa3-81da-c626eae8c910 ==============================
16:43:12.299620 [info ] [MainThread]: Running with dbt=1.9.1
16:43:12.299904 [debug] [MainThread]: running dbt with arguments {'printer_width': '80', 'indirect_selection': 'eager', 'write_json': 'True', 'log_cache_events': 'False', 'partial_parse': 'True', 'cache_selected_only': 'False', 'profiles_dir': '/Users/dallin/.dbt', 'debug': 'False', 'warn_error': 'None', 'log_path': 'logs', 'version_check': 'True', 'fail_fast': 'False', 'use_colors': 'True', 'use_experimental_parser': 'False', 'empty': 'None', 'quiet': 'False', 'no_print': 'None', 'log_format': 'default', 'invocation_command': 'dbt ', 'introspect': 'True', 'static_parser': 'True', 'target_path': 'None', 'warn_error_options': 'WarnErrorOptions(include=[], exclude=[])', 'send_anonymous_usage_stats': 'True'}
16:43:12.300518 [error] [MainThread]: Encountered an error:
Runtime Error
dbt_project.yml does not parse to a dictionary
16:43:12.307797 [debug] [MainThread]: Resource report: {"command_name": "deps", "command_success": false, "command_wall_clock_time": 0.059449833, "process_in_blocks": "0", "process_kernel_time": 0.115982, "process_mem_max_rss": "109068288", "process_out_blocks": "0", "process_user_time": 0.735308}
16:43:12.308245 [debug] [MainThread]: Command `cli deps` failed at 16:43:12.308182 after 0.06 seconds
16:43:12.308493 [debug] [MainThread]: Sending event: {'category': 'dbt', 'action': 'invocation', 'label': 'end', 'context': [<snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x100796ff0>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x1187fb320>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x1189471d0>]}
16:43:12.308659 [debug] [MainThread]: An error was encountered while trying to send an event
16:43:12.308794 [debug] [MainThread]: Flushing usage events
16:51:16.058543 [debug] [MainThread]: Sending event: {'category': 'dbt', 'action': 'invocation', 'label': 'start', 'context': [<snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x112e66a80>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x1131278c0>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x1131471d0>]}
16:51:16.062068 [debug] [MainThread]: An error was encountered while trying to send an event
============================== 16:51:16.062308 | 8665ca62-25ab-439c-b703-12898d07be6d ==============================
16:51:16.062308 [info ] [MainThread]: Running with dbt=1.9.1
16:51:16.062585 [debug] [MainThread]: running dbt with arguments {'printer_width': '80', 'indirect_selection': 'eager', 'log_cache_events': 'False', 'write_json': 'True', 'partial_parse': 'True', 'cache_selected_only': 'False', 'profiles_dir': '/Users/dallin/.dbt', 'fail_fast': 'False', 'version_check': 'True', 'log_path': 'logs', 'debug': 'False', 'warn_error': 'None', 'use_colors': 'True', 'use_experimental_parser': 'False', 'no_print': 'None', 'quiet': 'False', 'empty': 'None', 'log_format': 'default', 'introspect': 'True', 'warn_error_options': 'WarnErrorOptions(include=[], exclude=[])', 'static_parser': 'True', 'target_path': 'None', 'invocation_command': 'dbt ', 'send_anonymous_usage_stats': 'True'}
16:51:16.063228 [error] [MainThread]: Encountered an error:
Runtime Error
dbt_project.yml does not parse to a dictionary
16:51:16.064688 [debug] [MainThread]: Resource report: {"command_name": "deps", "command_success": false, "command_wall_clock_time": 0.050356373, "process_in_blocks": "0", "process_kernel_time": 0.096193, "process_mem_max_rss": "108462080", "process_out_blocks": "0", "process_user_time": 0.712887}
16:51:16.065111 [debug] [MainThread]: Command `cli deps` failed at 16:51:16.065048 after 0.05 seconds
16:51:16.065346 [debug] [MainThread]: Sending event: {'category': 'dbt', 'action': 'invocation', 'label': 'end', 'context': [<snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x112241640>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x112e07f20>, <snowplow_tracker.self_describing_json.SelfDescribingJson object at 0x11315bcb0>]}
16:51:16.065563 [debug] [MainThread]: An error was encountered while trying to send an event
16:51:16.065718 [debug] [MainThread]: Flushing usage events