mirror of https://github.com/buster-so/buster.git
hotfix: sync values job
This commit is contained in:
parent
17837e6486
commit
6a60055160
|
@ -32,6 +32,7 @@ search = { path = "../search" }
|
||||||
email = { path = "../email" }
|
email = { path = "../email" }
|
||||||
sql_analyzer = { path = "../sql_analyzer" }
|
sql_analyzer = { path = "../sql_analyzer" }
|
||||||
dataset_security = { path = "../dataset_security" }
|
dataset_security = { path = "../dataset_security" }
|
||||||
|
semantic_layer = { path = "../semantic_layer" }
|
||||||
|
|
||||||
# Add any handler-specific dependencies here
|
# Add any handler-specific dependencies here
|
||||||
dashmap = "5.5.3"
|
dashmap = "5.5.3"
|
||||||
|
|
|
@ -5,20 +5,26 @@ use chrono::{DateTime, Utc};
|
||||||
use diesel::{upsert::excluded, ExpressionMethods, QueryDsl};
|
use diesel::{upsert::excluded, ExpressionMethods, QueryDsl};
|
||||||
use diesel_async::RunQueryDsl;
|
use diesel_async::RunQueryDsl;
|
||||||
use serde::Deserialize; // Added for DeployDatasetsRequest if it's moved or redefined here
|
use serde::Deserialize; // Added for DeployDatasetsRequest if it's moved or redefined here
|
||||||
|
use serde_yaml; // Added for model deserialization
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use uuid::Uuid;
|
use tokio::spawn; // Added for concurrent job execution
|
||||||
use tracing::{error, info, warn};
|
use tracing::{error, info, warn};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
// Types from this crate's parent (handlers) -> Corrected to super
|
// Types from this crate's parent (handlers) -> Corrected to super
|
||||||
use super::types::{ValidationError, ValidationResult, DeployDatasetsRequest}; // Added DeployDatasetsRequest
|
use super::types::{DeployDatasetsRequest, ValidationError, ValidationResult}; // Added DeployDatasetsRequest
|
||||||
|
|
||||||
// Corrected to use the `database` crate directly as per Cargo.toml
|
// Corrected to use the `database` crate directly as per Cargo.toml
|
||||||
use database::{{
|
use database::{
|
||||||
enums::DatasetType,
|
enums::DatasetType,
|
||||||
models::{{DataSource, Dataset}},
|
models::{DataSource, Dataset},
|
||||||
pool::get_pg_pool,
|
pool::get_pg_pool,
|
||||||
schema::{{data_sources, datasets}},
|
schema::{data_sources, datasets},
|
||||||
}};
|
};
|
||||||
|
|
||||||
|
// Add imports for semantic_layer and stored_values
|
||||||
|
use semantic_layer::models::Model as SemanticModel; // Using alias
|
||||||
|
use stored_values::jobs as stored_values_jobs; // Using module alias for clarity
|
||||||
|
|
||||||
// TODO: Define or import necessary structs like DeployDatasetsRequest, DataSource, Dataset, etc.
|
// TODO: Define or import necessary structs like DeployDatasetsRequest, DataSource, Dataset, etc.
|
||||||
// For now, let's assume DeployDatasetsRequest will be passed in.
|
// For now, let's assume DeployDatasetsRequest will be passed in.
|
||||||
|
@ -32,7 +38,7 @@ pub async fn deploy_datasets_handler_core(
|
||||||
user_id: &Uuid,
|
user_id: &Uuid,
|
||||||
organization_id: Uuid, // Pass organization_id directly
|
organization_id: Uuid, // Pass organization_id directly
|
||||||
requests: Vec<DeployDatasetsRequest>, // This now uses the imported DeployDatasetsRequest
|
requests: Vec<DeployDatasetsRequest>, // This now uses the imported DeployDatasetsRequest
|
||||||
// conn: &mut AsyncPgConnection, // Or get a connection from a pool passed in/accessible globally
|
// conn: &mut AsyncPgConnection, // Or get a connection from a pool passed in/accessible globally
|
||||||
) -> Result<Vec<ValidationResult>> {
|
) -> Result<Vec<ValidationResult>> {
|
||||||
// Temporary: Get a connection. This needs to be replaced with proper DB connection management.
|
// Temporary: Get a connection. This needs to be replaced with proper DB connection management.
|
||||||
let mut conn = database::pool::get_pg_pool().get().await?; // This path is incorrect from here
|
let mut conn = database::pool::get_pg_pool().get().await?; // This path is incorrect from here
|
||||||
|
@ -121,9 +127,11 @@ pub async fn deploy_datasets_handler_core(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut datasets_to_upsert_map: HashMap<(String, Uuid), database::models::Dataset> = HashMap::new(); // Incorrect path
|
let mut datasets_to_upsert_map: HashMap<(String, Uuid), database::models::Dataset> =
|
||||||
|
HashMap::new(); // Incorrect path
|
||||||
|
|
||||||
for req in group.clone() { // group is Vec<&DeployDatasetsRequest>
|
for req in group.clone() {
|
||||||
|
// group is Vec<&DeployDatasetsRequest>
|
||||||
// Collect names of datasets intended for deployment in this group for this data_source.id
|
// Collect names of datasets intended for deployment in this group for this data_source.id
|
||||||
deployed_datasets_by_data_source
|
deployed_datasets_by_data_source
|
||||||
.entry(data_source.id)
|
.entry(data_source.id)
|
||||||
|
@ -136,22 +144,31 @@ pub async fn deploy_datasets_handler_core(
|
||||||
req.schema.clone(),
|
req.schema.clone(),
|
||||||
);
|
);
|
||||||
// Basic validation, e.g., if model is required
|
// Basic validation, e.g., if model is required
|
||||||
if req.model.is_none() { // Example validation: model is required
|
if req.model.is_none() {
|
||||||
validation.add_error(ValidationError::internal_error("Field 'model' is required.".to_string()));
|
// Example validation: model is required
|
||||||
|
validation.add_error(ValidationError::internal_error(
|
||||||
|
"Field 'model' is required.".to_string(),
|
||||||
|
));
|
||||||
results.push(validation);
|
results.push(validation);
|
||||||
continue; // Skip this request
|
continue; // Skip this request
|
||||||
}
|
}
|
||||||
validation.success = true; // Assume success initially, will be overridden by upsert errors
|
validation.success = true; // Assume success initially, will be overridden by upsert errors
|
||||||
results.push(validation);
|
results.push(validation);
|
||||||
|
|
||||||
|
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
let dataset_id = existing_dataset_ids.get(&req.name).copied().unwrap_or_else(|| req.id.unwrap_or_else(Uuid::new_v4));
|
let dataset_id = existing_dataset_ids
|
||||||
|
.get(&req.name)
|
||||||
|
.copied()
|
||||||
|
.unwrap_or_else(|| req.id.unwrap_or_else(Uuid::new_v4));
|
||||||
|
|
||||||
// Use req.database as a fallback for database_identifier
|
// Use req.database as a fallback for database_identifier
|
||||||
let final_database_identifier = req.database_identifier.clone().or_else(|| req.database.clone());
|
let final_database_identifier = req
|
||||||
|
.database_identifier
|
||||||
|
.clone()
|
||||||
|
.or_else(|| req.database.clone());
|
||||||
|
|
||||||
let dataset = database::models::Dataset { // Incorrect path
|
let dataset = database::models::Dataset {
|
||||||
|
// Incorrect path
|
||||||
id: dataset_id,
|
id: dataset_id,
|
||||||
name: req.name.clone(),
|
name: req.name.clone(),
|
||||||
data_source_id: data_source.id,
|
data_source_id: data_source.id,
|
||||||
|
@ -163,7 +180,7 @@ pub async fn deploy_datasets_handler_core(
|
||||||
type_: database::enums::DatasetType::View, // Incorrect path
|
type_: database::enums::DatasetType::View, // Incorrect path
|
||||||
definition: req.sql_definition.clone().unwrap_or_default(), // Still keeping SQL definition
|
definition: req.sql_definition.clone().unwrap_or_default(), // Still keeping SQL definition
|
||||||
schema: req.schema.clone(),
|
schema: req.schema.clone(),
|
||||||
enabled: true, // By default, a deployed dataset is enabled
|
enabled: true, // By default, a deployed dataset is enabled
|
||||||
created_by: *user_id, // This will be handled by DB default or on_conflict for new records
|
created_by: *user_id, // This will be handled by DB default or on_conflict for new records
|
||||||
updated_by: *user_id,
|
updated_by: *user_id,
|
||||||
deleted_at: None, // Explicitly mark as not deleted
|
deleted_at: None, // Explicitly mark as not deleted
|
||||||
|
@ -176,24 +193,35 @@ pub async fn deploy_datasets_handler_core(
|
||||||
datasets_to_upsert_map.insert((req.name.clone(), data_source.id), dataset);
|
datasets_to_upsert_map.insert((req.name.clone(), data_source.id), dataset);
|
||||||
}
|
}
|
||||||
|
|
||||||
let datasets_to_upsert: Vec<database::models::Dataset> = datasets_to_upsert_map.into_values().collect(); // Incorrect path
|
let datasets_to_upsert: Vec<database::models::Dataset> =
|
||||||
|
datasets_to_upsert_map.into_values().collect(); // Incorrect path
|
||||||
|
|
||||||
if !datasets_to_upsert.is_empty() {
|
if !datasets_to_upsert.is_empty() {
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
match diesel::insert_into(database::schema::datasets::table) // Incorrect path
|
match diesel::insert_into(database::schema::datasets::table) // Incorrect path
|
||||||
.values(&datasets_to_upsert)
|
.values(&datasets_to_upsert)
|
||||||
.on_conflict((database::schema::datasets::database_name, database::schema::datasets::data_source_id)) // Incorrect path
|
.on_conflict((
|
||||||
|
database::schema::datasets::database_name,
|
||||||
|
database::schema::datasets::data_source_id,
|
||||||
|
)) // Incorrect path
|
||||||
.do_update()
|
.do_update()
|
||||||
.set((
|
.set((
|
||||||
database::schema::datasets::name.eq(excluded(database::schema::datasets::name)),
|
database::schema::datasets::name.eq(excluded(database::schema::datasets::name)),
|
||||||
database::schema::datasets::updated_at.eq(now),
|
database::schema::datasets::updated_at.eq(now),
|
||||||
database::schema::datasets::updated_by.eq(excluded(database::schema::datasets::updated_by)),
|
database::schema::datasets::updated_by
|
||||||
database::schema::datasets::definition.eq(excluded(database::schema::datasets::definition)),
|
.eq(excluded(database::schema::datasets::updated_by)),
|
||||||
database::schema::datasets::when_to_use.eq(excluded(database::schema::datasets::when_to_use)),
|
database::schema::datasets::definition
|
||||||
database::schema::datasets::model.eq(excluded(database::schema::datasets::model)),
|
.eq(excluded(database::schema::datasets::definition)),
|
||||||
database::schema::datasets::yml_file.eq(excluded(database::schema::datasets::yml_file)),
|
database::schema::datasets::when_to_use
|
||||||
database::schema::datasets::schema.eq(excluded(database::schema::datasets::schema)),
|
.eq(excluded(database::schema::datasets::when_to_use)),
|
||||||
database::schema::datasets::database_identifier.eq(excluded(database::schema::datasets::database_identifier)),
|
database::schema::datasets::model
|
||||||
|
.eq(excluded(database::schema::datasets::model)),
|
||||||
|
database::schema::datasets::yml_file
|
||||||
|
.eq(excluded(database::schema::datasets::yml_file)),
|
||||||
|
database::schema::datasets::schema
|
||||||
|
.eq(excluded(database::schema::datasets::schema)),
|
||||||
|
database::schema::datasets::database_identifier
|
||||||
|
.eq(excluded(database::schema::datasets::database_identifier)),
|
||||||
database::schema::datasets::enabled.eq(true), // Directly set to true on upsert
|
database::schema::datasets::enabled.eq(true), // Directly set to true on upsert
|
||||||
database::schema::datasets::deleted_at.eq(None as Option<DateTime<Utc>>), // Explicitly ensure it's not deleted
|
database::schema::datasets::deleted_at.eq(None as Option<DateTime<Utc>>), // Explicitly ensure it's not deleted
|
||||||
))
|
))
|
||||||
|
@ -201,24 +229,136 @@ pub async fn deploy_datasets_handler_core(
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(num_upserted) => {
|
Ok(num_upserted) => {
|
||||||
info!("Successfully upserted {} datasets for data source '{}'", num_upserted, data_source_name);
|
info!(
|
||||||
|
"Successfully upserted {} datasets for data source '{}'",
|
||||||
|
num_upserted, data_source_name
|
||||||
|
);
|
||||||
// Success is already marked for validation results, no change needed here unless specific counts matter.
|
// Success is already marked for validation results, no change needed here unless specific counts matter.
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to bulk upsert datasets for data_source_id '{}': {}", data_source.id, e);
|
error!(
|
||||||
|
"Failed to bulk upsert datasets for data_source_id '{}': {}",
|
||||||
|
data_source.id, e
|
||||||
|
);
|
||||||
// Mark all results for this group's successfully mapped datasets as failed
|
// Mark all results for this group's successfully mapped datasets as failed
|
||||||
for dataset_to_upsert in datasets_to_upsert {
|
for dataset_to_upsert in &datasets_to_upsert {
|
||||||
if let Some(validation_result) = results.iter_mut().find(|r| r.model_name == dataset_to_upsert.name && r.data_source_name == data_source_name && r.schema == dataset_to_upsert.schema) {
|
if let Some(validation_result) = results.iter_mut().find(|r| {
|
||||||
|
r.model_name == dataset_to_upsert.name
|
||||||
|
&& r.data_source_name == data_source_name
|
||||||
|
&& r.schema == dataset_to_upsert.schema
|
||||||
|
}) {
|
||||||
validation_result.success = false; // Mark as false explicitly
|
validation_result.success = false; // Mark as false explicitly
|
||||||
validation_result.add_error(ValidationError::internal_error(format!("Failed to upsert dataset: {}", e)));
|
validation_result.add_error(ValidationError::internal_error(format!(
|
||||||
|
"Failed to upsert dataset: {}",
|
||||||
|
e
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// Column processing is skipped as per requirements.
|
// Column processing is skipped as per requirements.
|
||||||
// Stored values sync job logic is also skipped as it depends on columns.
|
|
||||||
|
// ---- START: New logic for stored values jobs ----
|
||||||
|
info!(
|
||||||
|
data_source_id = %data_source.id,
|
||||||
|
"Processing datasets for potential stored value sync jobs."
|
||||||
|
);
|
||||||
|
for dataset_for_jobs in &datasets_to_upsert {
|
||||||
|
if let Some(yml_file_content) = &dataset_for_jobs.yml_file {
|
||||||
|
match serde_yaml::from_str::<SemanticModel>(yml_file_content) {
|
||||||
|
Ok(model_data) => {
|
||||||
|
let job_database_name = match dataset_for_jobs
|
||||||
|
.database_identifier
|
||||||
|
.as_ref()
|
||||||
|
{
|
||||||
|
Some(db_id) => db_id.clone(),
|
||||||
|
None => {
|
||||||
|
warn!(
|
||||||
|
dataset_name = %dataset_for_jobs.name,
|
||||||
|
data_source_id = %dataset_for_jobs.data_source_id,
|
||||||
|
model_name = %model_data.name,
|
||||||
|
"Skipping stored values job creation for model dimensions: dataset.database_identifier is None."
|
||||||
|
);
|
||||||
|
continue; // Skip this model's dimensions processing
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let job_schema_name = dataset_for_jobs.schema.clone();
|
||||||
|
|
||||||
|
for dimension in model_data.dimensions {
|
||||||
|
if dimension.searchable {
|
||||||
|
info!(
|
||||||
|
"Found searchable dimension '{}' in model '{}' for dataset '{}' (data_source_id: {})",
|
||||||
|
dimension.name, model_data.name, dataset_for_jobs.name, dataset_for_jobs.data_source_id
|
||||||
|
);
|
||||||
|
|
||||||
|
let job_data_source_id = dataset_for_jobs.data_source_id;
|
||||||
|
let current_job_database_name = job_database_name.clone();
|
||||||
|
let current_job_schema_name = job_schema_name.clone();
|
||||||
|
let job_table_name = dataset_for_jobs.name.clone();
|
||||||
|
let job_column_name = dimension.name.clone();
|
||||||
|
|
||||||
|
spawn(async move {
|
||||||
|
info!(
|
||||||
|
data_source_id = %job_data_source_id,
|
||||||
|
database_name = %current_job_database_name,
|
||||||
|
schema_name = %current_job_schema_name,
|
||||||
|
table_name = %job_table_name,
|
||||||
|
column_name = %job_column_name,
|
||||||
|
"Setting up and running stored values sync job for searchable dimension."
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Err(e) = stored_values_jobs::setup_sync_job(
|
||||||
|
job_data_source_id,
|
||||||
|
current_job_database_name.clone(),
|
||||||
|
current_job_schema_name.clone(),
|
||||||
|
job_table_name.clone(),
|
||||||
|
job_column_name.clone(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
error!(
|
||||||
|
"Failed to setup stored values sync job for {}.{}.{}.{} on data_source {}: {}",
|
||||||
|
current_job_database_name, current_job_schema_name, job_table_name, job_column_name, job_data_source_id, e
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
match stored_values_jobs::sync_distinct_values_chunk(
|
||||||
|
job_data_source_id,
|
||||||
|
current_job_database_name,
|
||||||
|
current_job_schema_name,
|
||||||
|
job_table_name,
|
||||||
|
job_column_name,
|
||||||
|
).await {
|
||||||
|
Ok(count) => info!(
|
||||||
|
"Successfully synced {} distinct values for searchable dimension '{}' (data_source_id: {}).",
|
||||||
|
count, dimension.name, job_data_source_id
|
||||||
|
),
|
||||||
|
Err(e) => error!(
|
||||||
|
"Failed to sync distinct values for searchable dimension '{}' (data_source_id: {}): {}",
|
||||||
|
dimension.name, job_data_source_id, e
|
||||||
|
),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(
|
||||||
|
"Failed to deserialize YML content for dataset '{}' (data_source_id: {}): {}. Skipping sync job creation for its dimensions.",
|
||||||
|
dataset_for_jobs.name, dataset_for_jobs.data_source_id, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ---- END: New logic for stored values jobs ----
|
||||||
} else {
|
} else {
|
||||||
info!("No datasets to upsert for data source '{}'", data_source_name);
|
info!(
|
||||||
|
"No datasets to upsert for data source '{}'",
|
||||||
|
data_source_name
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,7 +374,10 @@ pub async fn deploy_datasets_handler_core(
|
||||||
let active_datasets_in_db: Vec<(Uuid, String)> = match database::schema::datasets::table
|
let active_datasets_in_db: Vec<(Uuid, String)> = match database::schema::datasets::table
|
||||||
.filter(database::schema::datasets::data_source_id.eq(data_source_id_to_clean))
|
.filter(database::schema::datasets::data_source_id.eq(data_source_id_to_clean))
|
||||||
.filter(database::schema::datasets::deleted_at.is_null())
|
.filter(database::schema::datasets::deleted_at.is_null())
|
||||||
.select((database::schema::datasets::id, database::schema::datasets::database_name))
|
.select((
|
||||||
|
database::schema::datasets::id,
|
||||||
|
database::schema::datasets::database_name,
|
||||||
|
))
|
||||||
.load::<(Uuid, String)>(&mut conn)
|
.load::<(Uuid, String)>(&mut conn)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
@ -263,15 +406,18 @@ pub async fn deploy_datasets_handler_core(
|
||||||
data_source_id_to_clean
|
data_source_id_to_clean
|
||||||
);
|
);
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
match diesel::update(database::schema::datasets::table.filter(database::schema::datasets::id.eq_any(&dataset_ids_to_soft_delete)))
|
match diesel::update(
|
||||||
.set((
|
database::schema::datasets::table
|
||||||
database::schema::datasets::deleted_at.eq(now),
|
.filter(database::schema::datasets::id.eq_any(&dataset_ids_to_soft_delete)),
|
||||||
database::schema::datasets::updated_at.eq(now),
|
)
|
||||||
database::schema::datasets::updated_by.eq(*user_id),
|
.set((
|
||||||
database::schema::datasets::enabled.eq(false),
|
database::schema::datasets::deleted_at.eq(now),
|
||||||
))
|
database::schema::datasets::updated_at.eq(now),
|
||||||
.execute(&mut conn)
|
database::schema::datasets::updated_by.eq(*user_id),
|
||||||
.await
|
database::schema::datasets::enabled.eq(false),
|
||||||
|
))
|
||||||
|
.execute(&mut conn)
|
||||||
|
.await
|
||||||
{
|
{
|
||||||
Ok(num_deleted) => {
|
Ok(num_deleted) => {
|
||||||
info!(
|
info!(
|
||||||
|
|
Loading…
Reference in New Issue