search data catalog agent work

2025-04-15 07:40:02 -06:00 · 2025-04-15 07:40:02 -06:00 · 41baefac6e
parent 6135bf904b
commit 41baefac6e
3 changed files with 56 additions and 39 deletions
--- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs
@ -101,7 +101,7 @@ IMPORTANT GUIDELINES:
 4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information
 5. Include datasets that could provide contextual or supporting information
 6. When in doubt about relevance, lean towards including the dataset
-7. Ensure the "id" field exactly matches the dataset's UUID
+7. **CRITICAL:** The "id" field in your JSON response MUST contain ONLY the dataset's UUID string (e.g., "9711ca55-8329-4fd9-8b20-b6a3289f3d38"). Do NOT include the dataset name or any other information in the "id" field.
 8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly
 9. Consider these elements in the dataset metadata:
   - Column names and their data types
@ -452,7 +452,7 @@ async fn filter_datasets_with_llm(
    let llm_client = LiteLLMClient::new(None, None);
    let request = ChatCompletionRequest {
-        model: "gpt-4.1-mini".to_string(),
+        model: "gemini-2.0-flash-001".to_string(),
        messages: vec![AgentMessage::User {
            id: None,
            content: prompt,
@ -510,13 +510,32 @@ async fn filter_datasets_with_llm(
        .results
        .into_iter()
        .filter_map(|result| {
-            Uuid::parse_str(&result.id).ok().and_then(|id| {
+            debug!(llm_result_id = %result.id, "Processing LLM filter result");
-                dataset_map.get(&id).map(|dataset| DatasetResult {
+            let parsed_uuid_result = Uuid::parse_str(&result.id);
-                    id: dataset.id,
+            match &parsed_uuid_result {
-                    name: Some(dataset.name.clone()),
+                Ok(parsed_id) => {
-                    yml_content: dataset.yml_content.clone(),
+                    debug!(parsed_id = %parsed_id, "Successfully parsed UUID from LLM result");
-                })
+                    let dataset_option = dataset_map.get(parsed_id);
-            })
+                    match dataset_option {
                        Some(dataset) => {
                            debug!(dataset_id = %dataset.id, dataset_name = %dataset.name, "Found matching dataset in map");
                            Some(DatasetResult {
                                id: dataset.id,
                                name: Some(dataset.name.clone()),
                                yml_content: dataset.yml_content.clone(),
                            })
                        }
                        None => {
                            warn!(parsed_id = %parsed_id, "Parsed UUID not found in dataset_map");
                            None
                        }
                    }
                }
                Err(e) => {
                    error!(llm_result_id = %result.id, error = %e, "Failed to parse UUID from LLM result");
                    None
                }
            }
        })
        .collect();
--- a/api/libs/handlers/src/chats/post_chat_handler.rs
+++ b/api/libs/handlers/src/chats/post_chat_handler.rs
@ -2133,23 +2133,6 @@ fn transform_assistant_tool_message(
                }
            }
            "no_search_needed" => {
                // Handle the 'no_search_needed' tool call by creating a simple reasoning message
                let reasoning = BusterReasoningMessage::Text(BusterReasoningText {
                    id: tool_id.clone(),
                    reasoning_type: "text".to_string(),
                    title: "Skipped Data Catalog Search".to_string(),
                    secondary_title: format!("{} seconds", last_reasoning_completion_time.elapsed().as_secs()), // Use Delta
                    message: Some("Sufficient data context already available.".to_string()),
                    message_chunk: None,
                    status: Some("Complete".to_string()),
                });
                all_results.push(ToolTransformResult::Reasoning(reasoning)); // Corrected: all_results
                // Mark reasoning as complete for timing calculations
                if reasoning_complete_time.is_none() {
                     *reasoning_complete_time = Some(Instant::now());
                     *last_reasoning_completion_time = Instant::now(); // Update last completion time
                }
                 // Clear tracker since this tool doesn't use chunking for its reasoning output
                tracker.clear_chunk(tool_id.clone());
            }
--- a/cli/cli/src/commands/deploy.rs
+++ b/cli/cli/src/commands/deploy.rs
@ -1,5 +1,6 @@
 use anyhow::Result;
 use regex;
 use reqwest;
 use serde::{Deserialize, Serialize};
 use std::collections::HashSet;
 use std::path::{Path, PathBuf};
@ -957,18 +958,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
            }
        }
-        progress.status = "Validating model...".to_string();
+        // progress.status = "Validating model...".to_string();
-        progress.log_progress();
+        // progress.log_progress();
-        if let Err(errors) = model_file.validate(config.as_ref()).await {
+        // if let Err(errors) = model_file.validate(config.as_ref()).await {
-            for error in &errors {
+        //     for error in &errors {
-                progress.log_error(error);
+        //         progress.log_error(error);
-            }
+        //     }
-            result
+        //     result
-                .failures
+        //         .failures
-                .push((progress.current_file.clone(), "unknown".to_string(), errors));
+        //         .push((progress.current_file.clone(), "unknown".to_string(), errors));
-            continue;
+        //     continue;
-        }
+        // }
        // Process each model in the file
        for model in &model_file.model.models {
@ -1136,7 +1137,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
            }
            Err(e) => {
                println!("\n❌ Deployment failed!");
-                println!("Error: {}", e);
+
                // Attempt to extract more detail from the error
                let mut detailed_error = format!("{}", e);
                if let Some(source) = e.source() {
                    if let Some(reqwest_err) = source.downcast_ref::<reqwest::Error>() {
                        if let Some(status) = reqwest_err.status() {
                            detailed_error = format!("{} (HTTP Status: {})", detailed_error, status);
                        }
                    }
                }
                println!("Error: {}", detailed_error);
                println!("\n💡 Troubleshooting:");
                println!("1. Check data source:");
                println!("   - Verify '{}' exists in Buster", data_source_name);
@ -1148,7 +1160,10 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
                println!("3. Check relationships:");
                println!("   - Ensure referenced models exist");
                println!("   - Verify relationship types");
-                return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e));
+                return Err(anyhow::anyhow!(
                    "Failed to deploy models to Buster: {}",
                    detailed_error
                ));
            }
        }
    }