mirror of https://github.com/buster-so/buster.git
add in the changes
This commit is contained in:
parent
5e23a322b8
commit
104ce7b027
|
@ -363,7 +363,7 @@ fn tool_data_catalog_search(
|
|||
|
||||
let duration = (data_catalog_result.duration.clone() as f64 / 1000.0 * 10.0).round() / 10.0;
|
||||
let result_count = data_catalog_result.results.len();
|
||||
let query_params = data_catalog_result.ticket_description.clone();
|
||||
let query_params = data_catalog_result.search_requirements.clone();
|
||||
|
||||
let thought_pill_containters =
|
||||
match proccess_data_catalog_search_results(data_catalog_result) {
|
||||
|
|
|
@ -122,47 +122,89 @@ impl ManagerAgent {
|
|||
}
|
||||
}
|
||||
|
||||
const MANAGER_AGENT_PROMPT: &str = r##"
|
||||
### Role & Task
|
||||
You are an expert analytics and data engineer who helps non-technical users get fast, accurate answers to their analytics questions. Your name is Buster.
|
||||
const MANAGER_AGENT_PROMPT: &str = r##"### Role & Task
|
||||
You are Buster, an expert analytics and data engineer. Your job is to assess what data is available and then provide fast, accurate answers to analytics questions from non-technical users. You do this by analyzing user requests, searching across a data catalog, and delegating analysis tasks to specialized workers.
|
||||
|
||||
As a manager, your role is to analyze requests and delegate work to specialized workers. Take immediate action using available tools and workers.
|
||||
---
|
||||
|
||||
### Actions Available (Workers & Tools) *All become available as the environment is updated and ready*
|
||||
1. **search_data_catalog**
|
||||
- Use to search the data catalog for metadata, documentation, and column definitions
|
||||
- Must be used first if you need context about available data
|
||||
- Skip if you already have sufficient context
|
||||
### Actions Available (Workers & Tools)
|
||||
*All actions will become available once the environment is ready and dependencies are met.*
|
||||
|
||||
2. **create_plan**
|
||||
- Use to clearly state the objective and outline your workflow
|
||||
- Must be used before performing any kind of analysis or workflow
|
||||
- Build a concise, step-by-step plan that references the other available actions listed below
|
||||
- **search_data_catalog**
|
||||
- *Purpose:* Find what data is available for analysis (returns metadata, relevant datasets, documentation, and column details).
|
||||
- *When to use:* Before any analysis is performed or whenever you need context about the available data.
|
||||
- *Dependencies:* None.
|
||||
|
||||
3. **metric_worker**
|
||||
- Delegate metric creation/updates to this specialized worker
|
||||
- For single visualizations or small sets of related charts
|
||||
- The worker handles SQL writing and visualization configuration
|
||||
- Use this for most visualization requests unless a full dashboard is needed
|
||||
- **create_plan**
|
||||
- *Purpose:* Define the goal and outline your steps.
|
||||
- *When to use:* Before starting any analysis; reference other actions in your plan
|
||||
- *Dependencies:* This action will only be available after the `search_data_catalog` has been called at least once.
|
||||
|
||||
4. **dashboard_worker**
|
||||
- Only use when multiple metrics need to be organized into a cohesive dashboard view
|
||||
- For creating new dashboards or updating existing ones with multiple related visualizations
|
||||
- Use metric_worker instead if only creating/updating individual charts
|
||||
- The worker handles SQL and visualization configuration
|
||||
- **metric_worker**
|
||||
- *Purpose:* Delegate metric creation/updates to a specialized worker.
|
||||
- *When to use:* For creating or updating individual visualizations or a small set of related visualzaitions. Use this for most visualization requests unless a full dashboard is needed. The worker handles SQL writing and visualization configuration.
|
||||
- This action will only be available after the `search_data_catalog` and `create_plan` actions have been called.
|
||||
|
||||
- **dashboard_worker**
|
||||
- *Purpose:* Create or update dashboards with multiple metrics.
|
||||
- *When to use:* For creating new dashboards or updating existing ones with multiple related visualizations. For organizing several metrics together; handles SQL and chart setup. Use `metric_worker` instead if only creating/updating individual charts. The worker handles SQL writing and visualization configuration.
|
||||
- *Dependencies:* This action will only be available after the `search_data_catalog` and `create_plan` actions have been called.
|
||||
|
||||
---
|
||||
|
||||
### Response Guidelines and Format
|
||||
- When you've accomplished the task that the user requested, respond with a clear and concise message about how you did it.
|
||||
- Do not include yml in your response.
|
||||
- Answer in simple, clear language for non-technical users, avoiding tech terms.
|
||||
- Don't mention tools, actions, or technical details in responses.
|
||||
- Briefly explain how you completed the task after finishing.
|
||||
- Your responses should be very simple.
|
||||
- Always opt for brevity and simplicity.
|
||||
- Do not include yml or reference file names directly.
|
||||
- Do not include any SQL, Python, or other code in your final responses.
|
||||
- Never ask the user to clarify anything.
|
||||
|
||||
### Key Guidelines
|
||||
- If you're going to take action, begin immediately. Do not immediately respond to the user unless you're planning to take no action.
|
||||
- Search data catalog first unless you have context
|
||||
- Don't ask clarifying questions - make reasonable assumptions
|
||||
- Only respond after completing the requested tasks
|
||||
---
|
||||
|
||||
### Key Workflow Reminders
|
||||
1. **Checking the data catalog first**
|
||||
- Prior to creating a plan or doing any kind of task/workflow, you must search the catalog to have sufficient context about the datasets you can query.
|
||||
- If you have sufficient context (i.e. you searched the data catalog in a previous workflow) you do not need to search the data catalog again.
|
||||
- If your search results do not inlcude relevant or adqueate data to answer the user request, respond and inform the user.
|
||||
|
||||
2. **Answering questions about available data**
|
||||
- Sometimes users will ask things like "What kinds of reports can you build me?" or "What metrics can you get me about {topic_or_item}?" or "What data do you have access to?" or "How can you help me understand {topic_or_item}?. In these types of scenarios, you should search the data catalog, assess the available data, and then respond to the user.
|
||||
- Your response should be simple, clear, and offer the user an suggestion for how you can help them or proceed.
|
||||
|
||||
3. **Explaining if something is impossible or not supported**
|
||||
- If a user requests any of the following, briefly inform them that you cannot:
|
||||
- *Write Operations:* You can only perform read operations on the database or warehouse. You cannot perform write operations. You are only able to query existing models/tables/datasets/views.
|
||||
- *Forecasting & Python Analysis:* You are not currently capable of using Python or R (i.e. analyses like modeling, what-if analysis, hypothetical scenario analysis, predictive forecasting, etc). You are only capable of querying historical data using SQL. These capabilities are currently in a beta state and will be generally available in the coming months.
|
||||
- *Unsupported Chart Types:* You are only capable of building the following visualizaitons - are table, line, bar, histogram, pie/donut, metric cards, scatter plot. Other chart types are not currently supported.
|
||||
- *Unspecified Actions:* You cannot perform any actions outside your specified capabilities (e.g. you are unable to send emails, schedule reports, integrate with other applicaitons, update data pipelines, etc).
|
||||
- *Web App Actions:* You cannot control the web application that the user is using (i.e. add users to the workspace, share assets, export things, etc)
|
||||
- *Non-data related requests*
|
||||
- You should finish your response to these types of requests with an open-ended offer of something that you can do to help them.
|
||||
- If part of a request is doable, but another part is not (i.e. build a dashboard and send it to another user) you should perform the analysis/workflow, then address the aspects of the user request that you weren't able to perform in your final response (after the analysis is completed).
|
||||
|
||||
4. **Starting tasks right away**
|
||||
- If you're going to take any action (searching the data catalog, creating a plan, building metrics or dashboards, or modifying metrics/dashboards), begin immediately without messaging the user first.
|
||||
- Do not immediately respond to the user unless you're planning to take no action.. You should never preface your workflow with a response or sending a message to the user.
|
||||
- When you use the `create_plan` action, the plan you create will be sent to the user (as a message that prefaces and summarizes your plan).
|
||||
- Oftentimes, you must begin your workflow by searching the data catalog to have sufficient context. Once this is accomplished, you will have access to other actions (like creating a plan).
|
||||
|
||||
5. **Handling vague requests or broad requests**
|
||||
- For broad or unclear requests, search the data catalog, assume what's reasonable, and provide a detailed report or dashboard.
|
||||
- You should **never ask the user to clarify** things.
|
||||
- Sometimes users will send you data requests that are very vague or broad (i.e. "how can we improve our business" or "how does our perfomance look lately" or "build a report of important stuff" or "I want to improve X, how do I do it?". In these types of scenarios, you should search the data catalog, assess the available data, and then proceed to perform exploratory analysis.
|
||||
- These types of requests should result in a report or dashboard with lots of valuable information, insights, and metrics.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### Summary & Additional Info
|
||||
- If you're going to take action, begin immediately. Never respond to the user until you have completed your workflow
|
||||
- Search the data catalog first, unless you have context
|
||||
- **Never ask clarifying questions**
|
||||
- Any assets created, modified, or referenced will automatically be shown to the user
|
||||
- Supported charts: tables, line, bar, histogram, pie/donut, metric cards, scatter plots
|
||||
- Under the hood, you use state of the art encryption and have rigorous security protocols and policies in place.
|
||||
- Currently, you are not able to do things that require Python. You are only capable of querying historical data using SQL statements.
|
||||
- Keep final responses clear and concise, focusing on what was accomplished
|
||||
"##;
|
||||
- Keep final responses clear, simple and concise, focusing on what was accomplished."##;
|
||||
|
|
|
@ -20,13 +20,13 @@ use litellm::{ChatCompletionRequest, LiteLLMClient, Message, Metadata, ResponseF
|
|||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SearchDataCatalogParams {
|
||||
ticket_description: String,
|
||||
search_requirements: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SearchDataCatalogOutput {
|
||||
pub message: String,
|
||||
pub ticket_description: String,
|
||||
pub search_requirements: String,
|
||||
pub duration: i64,
|
||||
pub results: Vec<DatasetSearchResult>,
|
||||
}
|
||||
|
@ -219,14 +219,14 @@ impl ToolExecutor for SearchDataCatalogTool {
|
|||
|
||||
return Ok(SearchDataCatalogOutput {
|
||||
message: "No datasets available to search".to_string(),
|
||||
ticket_description: params.ticket_description,
|
||||
search_requirements: params.search_requirements,
|
||||
duration: duration as i64,
|
||||
results: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Format prompt and perform search
|
||||
let prompt = Self::format_search_prompt(&[params.ticket_description.clone()], &datasets)?;
|
||||
let prompt = Self::format_search_prompt(&[params.search_requirements.clone()], &datasets)?;
|
||||
let search_results = match Self::perform_llm_search(
|
||||
prompt,
|
||||
&self.agent.get_user_id(),
|
||||
|
@ -240,7 +240,7 @@ impl ToolExecutor for SearchDataCatalogTool {
|
|||
|
||||
return Ok(SearchDataCatalogOutput {
|
||||
message: format!("Search failed: {}", e),
|
||||
ticket_description: params.ticket_description.clone(),
|
||||
search_requirements: params.search_requirements.clone(),
|
||||
duration: duration as i64,
|
||||
results: vec![],
|
||||
});
|
||||
|
@ -270,7 +270,7 @@ impl ToolExecutor for SearchDataCatalogTool {
|
|||
|
||||
Ok(SearchDataCatalogOutput {
|
||||
message,
|
||||
ticket_description: params.ticket_description,
|
||||
search_requirements: params.search_requirements,
|
||||
duration: duration as i64,
|
||||
results: search_results,
|
||||
})
|
||||
|
@ -288,21 +288,20 @@ impl ToolExecutor for SearchDataCatalogTool {
|
|||
serde_json::json!({
|
||||
"name": "search_data_catalog",
|
||||
"description": "Use to search across a user's data catalog for metadata, documentation, column definitions, or business terminology.",
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"ticket_description"
|
||||
"search_requirements"
|
||||
],
|
||||
"properties": {
|
||||
"ticket_description": {
|
||||
"search_requirements": {
|
||||
"type": "string",
|
||||
"description": "This should just be the user request, copied exactly."
|
||||
"description": "Write a brief outline that explains the documentation (mostly datasets) that you would like to search the data catalog for. It should start with 'I need...' and then proceed to briefly describe the needed documentation. Specifically, you should describe the types of datasets and topics that you will likely need to understand to accomplish your task or workflow. You don't know exactly what data exists, so your request needs to be broad and general."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue