refactor: Rename and update dataset search tool

- Renamed `search_datasets.rs` to `search_data_catalog.rs`
- Updated `mod.rs` to reflect the new module and tool name
- Removed the placeholder `SearchDatasetsTool` implementation
- Prepared for future implementation of data catalog search functionality
This commit is contained in:
dal 2025-01-27 14:24:10 -07:00
parent 2bc68e8599
commit 0020e6ed4a
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
4 changed files with 111 additions and 47 deletions

View File

@ -7,14 +7,16 @@ use crate::utils::clients::ai::litellm::ToolCall;
mod search_files; mod search_files;
mod create_files; mod create_files;
mod bulk_modify_files; mod bulk_modify_files;
mod search_datasets; mod search_data_catalog;
mod open_files; mod open_files;
mod send_to_user;
pub use search_files::SearchFilesTool; pub use search_files::SearchFilesTool;
pub use create_files::CreateFilesTool; pub use create_files::CreateFilesTool;
pub use bulk_modify_files::BulkModifyFilesTool; pub use bulk_modify_files::BulkModifyFilesTool;
pub use search_datasets::SearchDatasetsTool; pub use search_data_catalog::SearchDataCatalogTool;
pub use open_files::OpenFilesTool; pub use open_files::OpenFilesTool;
pub use send_to_user::SendToUserTool;
/// A trait that defines how tools should be implemented. /// A trait that defines how tools should be implemented.
/// Any struct that wants to be used as a tool must implement this trait. /// Any struct that wants to be used as a tool must implement this trait.

View File

@ -0,0 +1,66 @@
use anyhow::Result;
use async_trait::async_trait;
use serde_json::Value;
use serde::{Deserialize, Serialize};
use crate::utils::{clients::ai::litellm::ToolCall, tools::ToolExecutor};
#[derive(Debug, Serialize, Deserialize)]
struct SearchDataCatalogParams {
search_terms: Vec<String>,
#[serde(default)]
item_types: Option<Vec<String>>,
}
#[derive(Debug, Serialize, Deserialize)]
struct CatalogSearchResult {
id: String,
name: String,
description: String,
item_type: String,
relevance_score: f32,
metadata: Value,
}
pub struct SearchDataCatalogTool;
#[async_trait]
impl ToolExecutor for SearchDataCatalogTool {
async fn execute(&self, tool_call: &ToolCall) -> Result<Value> {
let params: SearchDataCatalogParams = serde_json::from_value(tool_call.function.arguments.clone())?;
// TODO: Implement actual data catalog search logic
Ok(Value::Array(vec![]))
}
fn get_schema(&self) -> Value {
serde_json::json!({
"name": "search_data_catalog",
"strict": true,
"parameters": {
"type": "object",
"required": ["search_terms"],
"properties": {
"search_terms": {
"type": "array",
"items": {
"type": "string",
"description": "A search term for finding relevant data catalog entries"
},
"description": "Array of strings representing the terms to search for in the data catalog"
},
"item_types": {
"type": "array",
"items": {
"type": "string",
"enum": ["dataset", "metric", "business_term", "logic"],
"description": "Type of catalog item to search for"
},
"description": "Optional filter to limit search to specific types of catalog items"
}
},
"additionalProperties": false
},
"description": "Searches the data catalog for relevant items including datasets, metrics, business terms, and logic definitions. Returns structured results with relevance scores. Use this to find data assets and their documentation."
})
}
}

View File

@ -1,45 +0,0 @@
use anyhow::Result;
use async_trait::async_trait;
use serde_json::Value;
use serde::{Deserialize, Serialize};
use crate::utils::{clients::ai::litellm::ToolCall, tools::ToolExecutor};
#[derive(Debug, Serialize, Deserialize)]
struct SearchDatasetsParams {
search_terms: Vec<String>,
}
pub struct SearchDatasetsTool;
#[async_trait]
impl ToolExecutor for SearchDatasetsTool {
async fn execute(&self, tool_call: &ToolCall) -> Result<Value> {
let params: SearchDatasetsParams = serde_json::from_str(&tool_call.function.arguments.clone())?;
// TODO: Implement actual dataset search logic
Ok(Value::Array(vec![]))
}
fn get_schema(&self) -> Value {
serde_json::json!({
"name": "search_datasets",
"strict": true,
"parameters": {
"type": "object",
"required": ["search_terms"],
"properties": {
"search_terms": {
"type": "array",
"items": {
"type": "string",
"description": "A search term for finding relevant datasets"
},
"description": "Array of strings representing the terms to search for"
}
},
"additionalProperties": false
},
"description": "Searches for relevant datasets or tables you can query. If you need to write SQL but don't know which dataset to reference, call this with relevant search terms (e.g., \"orders,\" \"customers,\" \"sales transactions\")."
})
}
}

View File

@ -0,0 +1,41 @@
use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::utils::{clients::ai::litellm::ToolCall, tools::ToolExecutor};
#[derive(Debug, Serialize, Deserialize)]
struct SendToUserParams {
metric_id: String,
}
pub struct SendToUserTool;
#[async_trait]
impl ToolExecutor for SendToUserTool {
async fn execute(&self, tool_call: &ToolCall) -> Result<Value> {
let params: SendToUserParams = serde_json::from_str(&tool_call.function.arguments.clone())?;
// TODO: Implement actual send to user logic
Ok(Value::Array(vec![]))
}
fn get_schema(&self) -> Value {
serde_json::json!({
"name": "send_to_user",
"strict": true,
"parameters": {
"type": "object",
"required": ["metric_id"],
"properties": {
"metric_id": {
"type": "string",
"description": "The ID of the metric to send to the user"
}
},
"additionalProperties": false
},
"description": "Sends a metric to the user by its ID."
})
}
}