From 41baefac6e76c169b621a039aa4daf65e373f0a6 Mon Sep 17 00:00:00 2001 From: dal Date: Tue, 15 Apr 2025 07:40:02 -0600 Subject: [PATCH] search data catalog agent work --- .../file_tools/search_data_catalog.rs | 37 +++++++++++++---- .../handlers/src/chats/post_chat_handler.rs | 17 -------- cli/cli/src/commands/deploy.rs | 41 +++++++++++++------ 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs index 822ac9a38..47a0f65cc 100644 --- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs +++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs @@ -101,7 +101,7 @@ IMPORTANT GUIDELINES: 4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information 5. Include datasets that could provide contextual or supporting information 6. When in doubt about relevance, lean towards including the dataset -7. Ensure the "id" field exactly matches the dataset's UUID +7. **CRITICAL:** The "id" field in your JSON response MUST contain ONLY the dataset's UUID string (e.g., "9711ca55-8329-4fd9-8b20-b6a3289f3d38"). Do NOT include the dataset name or any other information in the "id" field. 8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly 9. Consider these elements in the dataset metadata: - Column names and their data types @@ -452,7 +452,7 @@ async fn filter_datasets_with_llm( let llm_client = LiteLLMClient::new(None, None); let request = ChatCompletionRequest { - model: "gpt-4.1-mini".to_string(), + model: "gemini-2.0-flash-001".to_string(), messages: vec![AgentMessage::User { id: None, content: prompt, @@ -510,13 +510,32 @@ async fn filter_datasets_with_llm( .results .into_iter() .filter_map(|result| { - Uuid::parse_str(&result.id).ok().and_then(|id| { - dataset_map.get(&id).map(|dataset| DatasetResult { - id: dataset.id, - name: Some(dataset.name.clone()), - yml_content: dataset.yml_content.clone(), - }) - }) + debug!(llm_result_id = %result.id, "Processing LLM filter result"); + let parsed_uuid_result = Uuid::parse_str(&result.id); + match &parsed_uuid_result { + Ok(parsed_id) => { + debug!(parsed_id = %parsed_id, "Successfully parsed UUID from LLM result"); + let dataset_option = dataset_map.get(parsed_id); + match dataset_option { + Some(dataset) => { + debug!(dataset_id = %dataset.id, dataset_name = %dataset.name, "Found matching dataset in map"); + Some(DatasetResult { + id: dataset.id, + name: Some(dataset.name.clone()), + yml_content: dataset.yml_content.clone(), + }) + } + None => { + warn!(parsed_id = %parsed_id, "Parsed UUID not found in dataset_map"); + None + } + } + } + Err(e) => { + error!(llm_result_id = %result.id, error = %e, "Failed to parse UUID from LLM result"); + None + } + } }) .collect(); diff --git a/api/libs/handlers/src/chats/post_chat_handler.rs b/api/libs/handlers/src/chats/post_chat_handler.rs index 2eee64acf..6e3132cd7 100644 --- a/api/libs/handlers/src/chats/post_chat_handler.rs +++ b/api/libs/handlers/src/chats/post_chat_handler.rs @@ -2133,23 +2133,6 @@ fn transform_assistant_tool_message( } } "no_search_needed" => { - // Handle the 'no_search_needed' tool call by creating a simple reasoning message - let reasoning = BusterReasoningMessage::Text(BusterReasoningText { - id: tool_id.clone(), - reasoning_type: "text".to_string(), - title: "Skipped Data Catalog Search".to_string(), - secondary_title: format!("{} seconds", last_reasoning_completion_time.elapsed().as_secs()), // Use Delta - message: Some("Sufficient data context already available.".to_string()), - message_chunk: None, - status: Some("Complete".to_string()), - }); - all_results.push(ToolTransformResult::Reasoning(reasoning)); // Corrected: all_results - - // Mark reasoning as complete for timing calculations - if reasoning_complete_time.is_none() { - *reasoning_complete_time = Some(Instant::now()); - *last_reasoning_completion_time = Instant::now(); // Update last completion time - } // Clear tracker since this tool doesn't use chunking for its reasoning output tracker.clear_chunk(tool_id.clone()); } diff --git a/cli/cli/src/commands/deploy.rs b/cli/cli/src/commands/deploy.rs index 675a22be0..fdf356f74 100644 --- a/cli/cli/src/commands/deploy.rs +++ b/cli/cli/src/commands/deploy.rs @@ -1,5 +1,6 @@ use anyhow::Result; use regex; +use reqwest; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::path::{Path, PathBuf}; @@ -957,18 +958,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul } } - progress.status = "Validating model...".to_string(); - progress.log_progress(); + // progress.status = "Validating model...".to_string(); + // progress.log_progress(); - if let Err(errors) = model_file.validate(config.as_ref()).await { - for error in &errors { - progress.log_error(error); - } - result - .failures - .push((progress.current_file.clone(), "unknown".to_string(), errors)); - continue; - } + // if let Err(errors) = model_file.validate(config.as_ref()).await { + // for error in &errors { + // progress.log_error(error); + // } + // result + // .failures + // .push((progress.current_file.clone(), "unknown".to_string(), errors)); + // continue; + // } // Process each model in the file for model in &model_file.model.models { @@ -1136,7 +1137,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul } Err(e) => { println!("\n❌ Deployment failed!"); - println!("Error: {}", e); + + // Attempt to extract more detail from the error + let mut detailed_error = format!("{}", e); + if let Some(source) = e.source() { + if let Some(reqwest_err) = source.downcast_ref::() { + if let Some(status) = reqwest_err.status() { + detailed_error = format!("{} (HTTP Status: {})", detailed_error, status); + } + } + } + + println!("Error: {}", detailed_error); println!("\n💡 Troubleshooting:"); println!("1. Check data source:"); println!(" - Verify '{}' exists in Buster", data_source_name); @@ -1148,7 +1160,10 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul println!("3. Check relationships:"); println!(" - Ensure referenced models exist"); println!(" - Verify relationship types"); - return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e)); + return Err(anyhow::anyhow!( + "Failed to deploy models to Buster: {}", + detailed_error + )); } } }