From f08ef35270b301fa64bf1af6377478255db4cee2 Mon Sep 17 00:00:00 2001 From: dal Date: Thu, 8 May 2025 01:55:58 -0600 Subject: [PATCH] cli release for homebrew deploy, backwards compatibiltiy on model types --- .github/workflows/cli-release.yml | 4 + api/libs/agents/Cargo.toml | 1 + .../agents/src/agents/buster_multi_agent.rs | 35 +-- .../file_tools/search_data_catalog.rs | 211 +++++++++++------- web/package-lock.json | 4 +- 5 files changed, 155 insertions(+), 100 deletions(-) diff --git a/.github/workflows/cli-release.yml b/.github/workflows/cli-release.yml index 529c9d7ae..3f02a5b6e 100644 --- a/.github/workflows/cli-release.yml +++ b/.github/workflows/cli-release.yml @@ -115,6 +115,9 @@ jobs: release: needs: build runs-on: ubuntu-latest + outputs: + cli_version: ${{ steps.get_version.outputs.version }} + cli_tag_name: ${{ steps.create_the_release.outputs.tag_name }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -131,6 +134,7 @@ jobs: echo "version=$VERSION" >> $GITHUB_OUTPUT echo "Extracted version: $VERSION" - name: Create Release + id: create_the_release uses: softprops/action-gh-release@v1 with: tag_name: v${{ steps.get_version.outputs.version }} diff --git a/api/libs/agents/Cargo.toml b/api/libs/agents/Cargo.toml index 5b029498d..25febaf8c 100644 --- a/api/libs/agents/Cargo.toml +++ b/api/libs/agents/Cargo.toml @@ -37,6 +37,7 @@ thiserror = { workspace = true } raindrop = { path = "../raindrop" } sql_analyzer = { path = "../sql_analyzer" } rerank = { path = "../rerank" } +semantic_layer = { path = "../semantic_layer" } # Development dependencies [dev-dependencies] diff --git a/api/libs/agents/src/agents/buster_multi_agent.rs b/api/libs/agents/src/agents/buster_multi_agent.rs index c15a13a1a..16d380d16 100644 --- a/api/libs/agents/src/agents/buster_multi_agent.rs +++ b/api/libs/agents/src/agents/buster_multi_agent.rs @@ -24,6 +24,9 @@ use crate::{agent::ModeProvider, Agent, AgentError, AgentExt, AgentThread}; // A use litellm::AgentMessage; +// Import the semantic layer models +use semantic_layer::models::SemanticLayerSpec; // Assuming models.rs is accessible like this + // Import AgentState and determine_agent_state (assuming they are pub in modes/mod.rs or similar) // If not, they might need to be moved or re-exported. // For now, let's assume they are accessible via crate::agents::modes::{AgentState, determine_agent_state} @@ -35,6 +38,7 @@ pub struct BusterSuperAgentOutput { pub duration: i64, pub thread_id: Uuid, pub messages: Vec, + pub message_id: Option, } #[derive(Debug, Deserialize, Serialize)] @@ -115,16 +119,16 @@ impl DatasetWithDescriptions { } // Define structs for YAML parsing -#[derive(Debug, Deserialize)] -struct YamlRoot { - models: Vec, -} +// #[derive(Debug, Deserialize)] +// struct YamlRoot { +// models: Vec, +// } -#[derive(Debug, Deserialize)] -struct ModelInfo { - name: String, - description: String, -} +// #[derive(Debug, Deserialize)] +// struct ModelInfo { +// name: String, +// description: String, +// } impl BusterMultiAgent { pub async fn new(user_id: Uuid, session_id: Uuid, is_follow_up: bool) -> Result { @@ -136,14 +140,19 @@ impl BusterMultiAgent { let dataset_descriptions: Vec = permissioned_datasets .into_iter() .filter_map(|ds| ds.yml_content) // Get Some(String), filter out None - .map(|content| serde_yaml::from_str::(&content)) // Parse String -> Result + .map(|content| serde_yaml::from_str::(&content)) // Parse String -> Result .filter_map(|result| { // Handle Result match result { - Ok(parsed_root) => { + Ok(parsed_spec) => { // Extract info from the first model if available - if let Some(model) = parsed_root.models.first() { - Some(format!("{}: {}", model.name, model.description)) + if let Some(model) = parsed_spec.models.first() { + // model.description is Option, handle it + let description = model + .description + .as_deref() + .unwrap_or("No description available"); + Some(format!("{}: {}", model.name, description)) } else { tracing::warn!("Parsed YAML has no models"); None diff --git a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs index f002f2dfe..ea6b8b5a8 100644 --- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs +++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs @@ -29,6 +29,9 @@ use sqlx::PgPool; use stored_values; use rerank::Reranker; +// Import SemanticLayerSpec +use semantic_layer::models::SemanticLayerSpec; + use crate::{agent::Agent, tools::ToolExecutor}; // NEW: Structure to represent found values with their source information @@ -1173,115 +1176,153 @@ async fn generate_embeddings_batch(texts: Vec) -> Result Result> { - let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content) - .context("Failed to parse dataset YAML content")?; - let mut searchable_dimensions = Vec::new(); - - // Check if models field exists - if let Some(models) = yaml["models"].as_sequence() { - for model in models { - let model_name = model["name"].as_str().unwrap_or("unknown_model").to_string(); - - // Check if dimensions field exists - if let Some(dimensions) = model["dimensions"].as_sequence() { - for dimension in dimensions { - // Check if dimension has searchable: true - if let Some(true) = dimension["searchable"].as_bool() { - let dimension_name = dimension["name"].as_str().unwrap_or("unknown_dimension").to_string(); - - // Store this dimension as searchable + + // Try parsing with SemanticLayerSpec first + match serde_yaml::from_str::(yml_content) { + Ok(spec) => { + debug!("Successfully parsed yml_content with SemanticLayerSpec for extract_searchable_dimensions"); + for model in spec.models { + for dimension in model.dimensions { + if dimension.searchable { searchable_dimensions.push(SearchableDimension { - model_name: model_name.clone(), // Clone here to avoid move - dimension_name: dimension_name.clone(), - dimension_path: vec!["models".to_string(), model_name.clone(), "dimensions".to_string(), dimension_name], + model_name: model.name.clone(), + dimension_name: dimension.name.clone(), + // The dimension_path might need adjustment if its usage relies on the old dynamic structure. + // For now, creating a simplified path. This might need review based on how dimension_path is consumed. + dimension_path: vec!["models".to_string(), model.name.clone(), "dimensions".to_string(), dimension.name], }); } } } } + Err(e_spec) => { + warn!( + "Failed to parse yml_content with SemanticLayerSpec (error: {}), falling back to generic serde_yaml::Value for extract_searchable_dimensions. Consider updating YAML to new spec.", + e_spec + ); + // Fallback to original dynamic parsing logic + let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content) + .context("Failed to parse dataset YAML content (fallback)")?; + + if let Some(models) = yaml["models"].as_sequence() { + for model_val in models { + let model_name = model_val["name"].as_str().unwrap_or("unknown_model").to_string(); + if let Some(dimensions) = model_val["dimensions"].as_sequence() { + for dimension_val in dimensions { + if let Some(true) = dimension_val["searchable"].as_bool() { + let dimension_name = dimension_val["name"].as_str().unwrap_or("unknown_dimension").to_string(); + searchable_dimensions.push(SearchableDimension { + model_name: model_name.clone(), + dimension_name: dimension_name.clone(), + dimension_path: vec!["models".to_string(), model_name.clone(), "dimensions".to_string(), dimension_name], + }); + } + } + } + } + } + } } - Ok(searchable_dimensions) } /// Extract database structure from YAML content based on actual model structure fn extract_database_info_from_yaml(yml_content: &str) -> Result>>>> { - let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content) - .context("Failed to parse dataset YAML content")?; - - // Structure: database -> schema -> table -> columns - let mut database_info = HashMap::new(); - - // Process models - if let Some(models) = yaml["models"].as_sequence() { - for model in models { - // Extract database, schema, and model name (which acts as table name) - let database_name = model["database"].as_str().unwrap_or("unknown").to_string(); - let schema_name = model["schema"].as_str().unwrap_or("public").to_string(); - let table_name = model["name"].as_str().unwrap_or("unknown_model").to_string(); - - // Initialize the nested structure if needed - database_info - .entry(database_name.clone()) - .or_insert_with(HashMap::new) - .entry(schema_name.clone()) - .or_insert_with(HashMap::new); - - // Collect column names from dimensions, measures, and metrics - let mut columns = Vec::new(); - - // Add dimensions - if let Some(dimensions) = model["dimensions"].as_sequence() { - for dim in dimensions { - if let Some(dim_name) = dim["name"].as_str() { - columns.push(dim_name.to_string()); - - // Also add the expression as a potential column to search - if let Some(expr) = dim["expr"].as_str() { - if expr != dim_name { - columns.push(expr.to_string()); + let mut database_info: HashMap>>> = HashMap::new(); + + match serde_yaml::from_str::(yml_content) { + Ok(spec) => { + debug!("Successfully parsed yml_content with SemanticLayerSpec for extract_database_info_from_yaml"); + for model in spec.models { + let db_name = model.database.as_deref().unwrap_or("unknown_db").to_string(); + let sch_name = model.schema.as_deref().unwrap_or("unknown_schema").to_string(); + let tbl_name = model.name.clone(); // model.name is table name + + let mut columns = Vec::new(); + for dim in model.dimensions { + columns.push(dim.name); + // Assuming 'expr' is not directly a column name in SemanticLayerSpec's Dimension for this purpose. + // If dimensions can have expressions that resolve to column names, adjust here. + } + for measure in model.measures { + columns.push(measure.name); + // Assuming 'expr' is not directly a column name here either. + } + for metric in model.metrics { + columns.push(metric.name); // Metrics usually have names, expressions might be too complex for simple column list + } + + database_info + .entry(db_name) + .or_default() + .entry(sch_name) + .or_default() + .insert(tbl_name, columns); + } + } + Err(e_spec) => { + warn!( + "Failed to parse yml_content with SemanticLayerSpec (error: {}), falling back to generic serde_yaml::Value for extract_database_info_from_yaml. Consider updating YAML to new spec.", + e_spec + ); + let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content) + .context("Failed to parse dataset YAML content (fallback)")?; + + if let Some(models) = yaml["models"].as_sequence() { + for model_val in models { + let database_name = model_val["database"].as_str().unwrap_or("unknown").to_string(); + let schema_name = model_val["schema"].as_str().unwrap_or("public").to_string(); + let table_name = model_val["name"].as_str().unwrap_or("unknown_model").to_string(); + + database_info + .entry(database_name.clone()) + .or_insert_with(HashMap::new) + .entry(schema_name.clone()) + .or_insert_with(HashMap::new); + + let mut columns = Vec::new(); + if let Some(dimensions) = model_val["dimensions"].as_sequence() { + for dim in dimensions { + if let Some(dim_name) = dim["name"].as_str() { + columns.push(dim_name.to_string()); + if let Some(expr) = dim["expr"].as_str() { + if expr != dim_name { + columns.push(expr.to_string()); + } + } } } } - } - } - - // Add measures - if let Some(measures) = model["measures"].as_sequence() { - for measure in measures { - if let Some(measure_name) = measure["name"].as_str() { - columns.push(measure_name.to_string()); - - // Also add the expression as a potential column to search - if let Some(expr) = measure["expr"].as_str() { - if expr != measure_name { - columns.push(expr.to_string()); + if let Some(measures) = model_val["measures"].as_sequence() { + for measure in measures { + if let Some(measure_name) = measure["name"].as_str() { + columns.push(measure_name.to_string()); + if let Some(expr) = measure["expr"].as_str() { + if expr != measure_name { + columns.push(expr.to_string()); + } + } } } } - } - } - - // Add metrics - if let Some(metrics) = model["metrics"].as_sequence() { - for metric in metrics { - if let Some(metric_name) = metric["name"].as_str() { - columns.push(metric_name.to_string()); + if let Some(metrics) = model_val["metrics"].as_sequence() { + for metric in metrics { + if let Some(metric_name) = metric["name"].as_str() { + columns.push(metric_name.to_string()); + } + } } + database_info + .get_mut(&database_name) + .unwrap() + .get_mut(&schema_name) + .unwrap() + .insert(table_name, columns); } } - - // Store columns for this model - database_info - .get_mut(&database_name) - .unwrap() - .get_mut(&schema_name) - .unwrap() - .insert(table_name, columns); } } - Ok(database_info) } diff --git a/web/package-lock.json b/web/package-lock.json index 95d9e4ddb..a333eb8ab 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "web", - "version": "0.1.0", + "version": "0.1.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "web", - "version": "0.1.0", + "version": "0.1.1", "dependencies": { "@dnd-kit/core": "^6.3.1", "@dnd-kit/modifiers": "^9.0.0",