mirror of https://github.com/buster-so/buster.git
search data catalog agent work
This commit is contained in:
parent
6135bf904b
commit
41baefac6e
|
@ -101,7 +101,7 @@ IMPORTANT GUIDELINES:
|
||||||
4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information
|
4. Evaluate based on whether the dataset's schema, fields, or description MIGHT contain or relate to the relevant information
|
||||||
5. Include datasets that could provide contextual or supporting information
|
5. Include datasets that could provide contextual or supporting information
|
||||||
6. When in doubt about relevance, lean towards including the dataset
|
6. When in doubt about relevance, lean towards including the dataset
|
||||||
7. Ensure the "id" field exactly matches the dataset's UUID
|
7. **CRITICAL:** The "id" field in your JSON response MUST contain ONLY the dataset's UUID string (e.g., "9711ca55-8329-4fd9-8b20-b6a3289f3d38"). Do NOT include the dataset name or any other information in the "id" field.
|
||||||
8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly
|
8. Use both the USER REQUEST and SEARCH QUERY to understand the user's information needs broadly
|
||||||
9. Consider these elements in the dataset metadata:
|
9. Consider these elements in the dataset metadata:
|
||||||
- Column names and their data types
|
- Column names and their data types
|
||||||
|
@ -452,7 +452,7 @@ async fn filter_datasets_with_llm(
|
||||||
let llm_client = LiteLLMClient::new(None, None);
|
let llm_client = LiteLLMClient::new(None, None);
|
||||||
|
|
||||||
let request = ChatCompletionRequest {
|
let request = ChatCompletionRequest {
|
||||||
model: "gpt-4.1-mini".to_string(),
|
model: "gemini-2.0-flash-001".to_string(),
|
||||||
messages: vec![AgentMessage::User {
|
messages: vec![AgentMessage::User {
|
||||||
id: None,
|
id: None,
|
||||||
content: prompt,
|
content: prompt,
|
||||||
|
@ -510,13 +510,32 @@ async fn filter_datasets_with_llm(
|
||||||
.results
|
.results
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|result| {
|
.filter_map(|result| {
|
||||||
Uuid::parse_str(&result.id).ok().and_then(|id| {
|
debug!(llm_result_id = %result.id, "Processing LLM filter result");
|
||||||
dataset_map.get(&id).map(|dataset| DatasetResult {
|
let parsed_uuid_result = Uuid::parse_str(&result.id);
|
||||||
id: dataset.id,
|
match &parsed_uuid_result {
|
||||||
name: Some(dataset.name.clone()),
|
Ok(parsed_id) => {
|
||||||
yml_content: dataset.yml_content.clone(),
|
debug!(parsed_id = %parsed_id, "Successfully parsed UUID from LLM result");
|
||||||
})
|
let dataset_option = dataset_map.get(parsed_id);
|
||||||
})
|
match dataset_option {
|
||||||
|
Some(dataset) => {
|
||||||
|
debug!(dataset_id = %dataset.id, dataset_name = %dataset.name, "Found matching dataset in map");
|
||||||
|
Some(DatasetResult {
|
||||||
|
id: dataset.id,
|
||||||
|
name: Some(dataset.name.clone()),
|
||||||
|
yml_content: dataset.yml_content.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
warn!(parsed_id = %parsed_id, "Parsed UUID not found in dataset_map");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(llm_result_id = %result.id, error = %e, "Failed to parse UUID from LLM result");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
|
@ -2133,23 +2133,6 @@ fn transform_assistant_tool_message(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"no_search_needed" => {
|
"no_search_needed" => {
|
||||||
// Handle the 'no_search_needed' tool call by creating a simple reasoning message
|
|
||||||
let reasoning = BusterReasoningMessage::Text(BusterReasoningText {
|
|
||||||
id: tool_id.clone(),
|
|
||||||
reasoning_type: "text".to_string(),
|
|
||||||
title: "Skipped Data Catalog Search".to_string(),
|
|
||||||
secondary_title: format!("{} seconds", last_reasoning_completion_time.elapsed().as_secs()), // Use Delta
|
|
||||||
message: Some("Sufficient data context already available.".to_string()),
|
|
||||||
message_chunk: None,
|
|
||||||
status: Some("Complete".to_string()),
|
|
||||||
});
|
|
||||||
all_results.push(ToolTransformResult::Reasoning(reasoning)); // Corrected: all_results
|
|
||||||
|
|
||||||
// Mark reasoning as complete for timing calculations
|
|
||||||
if reasoning_complete_time.is_none() {
|
|
||||||
*reasoning_complete_time = Some(Instant::now());
|
|
||||||
*last_reasoning_completion_time = Instant::now(); // Update last completion time
|
|
||||||
}
|
|
||||||
// Clear tracker since this tool doesn't use chunking for its reasoning output
|
// Clear tracker since this tool doesn't use chunking for its reasoning output
|
||||||
tracker.clear_chunk(tool_id.clone());
|
tracker.clear_chunk(tool_id.clone());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use regex;
|
use regex;
|
||||||
|
use reqwest;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
@ -957,18 +958,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
progress.status = "Validating model...".to_string();
|
// progress.status = "Validating model...".to_string();
|
||||||
progress.log_progress();
|
// progress.log_progress();
|
||||||
|
|
||||||
if let Err(errors) = model_file.validate(config.as_ref()).await {
|
// if let Err(errors) = model_file.validate(config.as_ref()).await {
|
||||||
for error in &errors {
|
// for error in &errors {
|
||||||
progress.log_error(error);
|
// progress.log_error(error);
|
||||||
}
|
// }
|
||||||
result
|
// result
|
||||||
.failures
|
// .failures
|
||||||
.push((progress.current_file.clone(), "unknown".to_string(), errors));
|
// .push((progress.current_file.clone(), "unknown".to_string(), errors));
|
||||||
continue;
|
// continue;
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Process each model in the file
|
// Process each model in the file
|
||||||
for model in &model_file.model.models {
|
for model in &model_file.model.models {
|
||||||
|
@ -1136,7 +1137,18 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("\n❌ Deployment failed!");
|
println!("\n❌ Deployment failed!");
|
||||||
println!("Error: {}", e);
|
|
||||||
|
// Attempt to extract more detail from the error
|
||||||
|
let mut detailed_error = format!("{}", e);
|
||||||
|
if let Some(source) = e.source() {
|
||||||
|
if let Some(reqwest_err) = source.downcast_ref::<reqwest::Error>() {
|
||||||
|
if let Some(status) = reqwest_err.status() {
|
||||||
|
detailed_error = format!("{} (HTTP Status: {})", detailed_error, status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Error: {}", detailed_error);
|
||||||
println!("\n💡 Troubleshooting:");
|
println!("\n💡 Troubleshooting:");
|
||||||
println!("1. Check data source:");
|
println!("1. Check data source:");
|
||||||
println!(" - Verify '{}' exists in Buster", data_source_name);
|
println!(" - Verify '{}' exists in Buster", data_source_name);
|
||||||
|
@ -1148,7 +1160,10 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
|
||||||
println!("3. Check relationships:");
|
println!("3. Check relationships:");
|
||||||
println!(" - Ensure referenced models exist");
|
println!(" - Ensure referenced models exist");
|
||||||
println!(" - Verify relationship types");
|
println!(" - Verify relationship types");
|
||||||
return Err(anyhow::anyhow!("Failed to deploy models to Buster: {}", e));
|
return Err(anyhow::anyhow!(
|
||||||
|
"Failed to deploy models to Buster: {}",
|
||||||
|
detailed_error
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue