search data catalog agent work

This commit is contained in:
dal 2025-04-15 07:40:02 -06:00
parent 6135bf904b
commit 41baefac6e
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
3 changed files with 56 additions and 39 deletions

View File

@ -101,7 +101,7 @@ IMPORTANT GUIDELINES:
4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information 4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information
5. Include datasets that could provide contextual or supporting information 5. Include datasets that could provide contextual or supporting information
6. When in doubt about relevance, lean towards including the dataset 6. When in doubt about relevance, lean towards including the dataset
7. Ensure the "id" field exactly matches the dataset's UUID 7. **CRITICAL:** The "id" field in your JSON response MUST contain ONLY the dataset's UUID string (e.g., "9711ca55-8329-4fd9-8b20-b6a3289f3d38"). Do NOT include the dataset name or any other information in the "id" field.
8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly 8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly
9. Consider these elements in the dataset metadata: 9. Consider these elements in the dataset metadata:
- Column names and their data types - Column names and their data types
@ -452,7 +452,7 @@ async fn filter_datasets_with_llm(
let llm_client = LiteLLMClient::new(None, None); let llm_client = LiteLLMClient::new(None, None);
let request = ChatCompletionRequest { let request = ChatCompletionRequest {
model: "gpt-4.1-mini".to_string(), model: "gemini-2.0-flash-001".to_string(),
messages: vec![AgentMessage::User { messages: vec![AgentMessage::User {
id: None, id: None,
content: prompt, content: prompt,
@ -510,13 +510,32 @@ async fn filter_datasets_with_llm(
.results .results
.into_iter() .into_iter()
.filter_map(|result| { .filter_map(|result| {
Uuid::parse_str(&result.id).ok().and_then(|id| { debug!(llm_result_id = %result.id, "Processing LLM filter result");
dataset_map.get(&id).map(|dataset| DatasetResult { let parsed_uuid_result = Uuid::parse_str(&result.id);
id: dataset.id, match &parsed_uuid_result {
name: Some(dataset.name.clone()), Ok(parsed_id) => {
yml_content: dataset.yml_content.clone(), debug!(parsed_id = %parsed_id, "Successfully parsed UUID from LLM result");
}) let dataset_option = dataset_map.get(parsed_id);
}) match dataset_option {
Some(dataset) => {
debug!(dataset_id = %dataset.id, dataset_name = %dataset.name, "Found matching dataset in map");
Some(DatasetResult {
id: dataset.id,
name: Some(dataset.name.clone()),
yml_content: dataset.yml_content.clone(),
})
}
None => {
warn!(parsed_id = %parsed_id, "Parsed UUID not found in dataset_map");
None
}
}
}
Err(e) => {
error!(llm_result_id = %result.id, error = %e, "Failed to parse UUID from LLM result");
None
}
}
}) })
.collect(); .collect();

View File

@ -2133,23 +2133,6 @@ fn transform_assistant_tool_message(
} }
} }
"no_search_needed" => { "no_search_needed" => {
// Handle the 'no_search_needed' tool call by creating a simple reasoning message
let reasoning = BusterReasoningMessage::Text(BusterReasoningText {
id: tool_id.clone(),
reasoning_type: "text".to_string(),
title: "Skipped Data Catalog Search".to_string(),
secondary_title: format!("{} seconds", last_reasoning_completion_time.elapsed().as_secs()), // Use Delta
message: Some("Sufficient data context already available.".to_string()),
message_chunk: None,
status: Some("Complete".to_string()),
});
all_results.push(ToolTransformResult::Reasoning(reasoning)); // Corrected: all_results
// Mark reasoning as complete for timing calculations
if reasoning_complete_time.is_none() {
*reasoning_complete_time = Some(Instant::now());
*last_reasoning_completion_time = Instant::now(); // Update last completion time
}
// Clear tracker since this tool doesn't use chunking for its reasoning output // Clear tracker since this tool doesn't use chunking for its reasoning output
tracker.clear_chunk(tool_id.clone()); tracker.clear_chunk(tool_id.clone());
} }

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use regex; use regex;
use reqwest;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashSet; use std::collections::HashSet;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -957,18 +958,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
} }
} }
progress.status = "Validating model...".to_string(); // progress.status = "Validating model...".to_string();
progress.log_progress(); // progress.log_progress();
if let Err(errors) = model_file.validate(config.as_ref()).await { // if let Err(errors) = model_file.validate(config.as_ref()).await {
for error in &errors { // for error in &errors {
progress.log_error(error); // progress.log_error(error);
} // }
result // result
.failures // .failures
.push((progress.current_file.clone(), "unknown".to_string(), errors)); // .push((progress.current_file.clone(), "unknown".to_string(), errors));
continue; // continue;
} // }
// Process each model in the file // Process each model in the file
for model in &model_file.model.models { for model in &model_file.model.models {
@ -1136,7 +1137,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
} }
Err(e) => { Err(e) => {
println!("\n❌ Deployment failed!"); println!("\n❌ Deployment failed!");
println!("Error: {}", e);
// Attempt to extract more detail from the error
let mut detailed_error = format!("{}", e);
if let Some(source) = e.source() {
if let Some(reqwest_err) = source.downcast_ref::<reqwest::Error>() {
if let Some(status) = reqwest_err.status() {
detailed_error = format!("{} (HTTP Status: {})", detailed_error, status);
}
}
}
println!("Error: {}", detailed_error);
println!("\n💡 Troubleshooting:"); println!("\n💡 Troubleshooting:");
println!("1. Check data source:"); println!("1. Check data source:");
println!(" - Verify '{}' exists in Buster", data_source_name); println!(" - Verify '{}' exists in Buster", data_source_name);
@ -1148,7 +1160,10 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
println!("3. Check relationships:"); println!("3. Check relationships:");
println!(" - Ensure referenced models exist"); println!(" - Ensure referenced models exist");
println!(" - Verify relationship types"); println!(" - Verify relationship types");
return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e)); return Err(anyhow::anyhow!(
"Failed to deploy models to Buster: {}",
detailed_error
));
} }
} }
} }