changes for snowflake querying

2025-04-18 09:25:08 -06:00 · 2025-04-18 09:25:08 -06:00 · c17cd8e06b
parent fcaaddef5d
commit c17cd8e06b
6 changed files with 2457 additions and 675 deletions
--- a/api/libs/agents/src/agents/modes/mod.rs
+++ b/api/libs/agents/src/agents/modes/mod.rs
@ -49,7 +49,6 @@ pub struct ModeConfiguration {
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub enum AgentState {
    Initializing,
-    FollowUpInitialization,
    DataCatalogSearch,
    Planning,
    AnalysisExecution,
@ -77,86 +76,90 @@ pub fn determine_agent_state(state: &HashMap<String, Value>) -> AgentState {
    let has_user_prompt = state.contains_key("user_prompt"); // Check if latest user prompt is stored


-    // 1. Handle states before the user provides their first prompt in this turn/session
-    if !has_user_prompt {
-        return if is_follow_up {
-            AgentState::FollowUpInitialization
-        } else {
-            AgentState::Initializing
-        };
+    // 1. Handle the state before the user provides their first prompt in this turn
+    //    If it's not a follow-up and there's no prompt, it's the initial state.
+    //    If it IS a follow-up, we proceed to the next checks even without a new prompt,
+    //    as the follow-up flag indicates we should continue the existing flow.
+    if !has_user_prompt && !is_follow_up {
+        return AgentState::Initializing;
    }

-    // 2. Review always takes precedence after user speaks
+    // 2. Review always takes precedence after user speaks (or if flagged from previous turn)
    if needs_review {
        return AgentState::Review;
    }

-    // 3. If we haven't searched the catalog yet, do that now (initial or follow-up)
-    //    This is the key change: check this condition before others like has_data_context
+    // 3. If we haven't searched the catalog yet (or need to search again for follow-up), do that now.
+    //    This applies to both initial requests and follow-ups that might require new data.
    if !searched_catalog {
        return AgentState::DataCatalogSearch;
    }

-    // 4. If we have context but no plan, plan
+    // 4. If we have context but no plan, plan.
+    //    This covers initial runs after search and follow-ups needing planning.
    if has_data_context && !has_plan {
        return AgentState::Planning;
    }

-    // 5. If we have context and a plan, execute analysis
+    // 5. If we have context and a plan, execute analysis.
+    //    This covers initial runs and follow-ups continuing analysis.
    if has_data_context && has_plan {
        return AgentState::AnalysisExecution;
    }

-    // 6. Fallback: If the state is ambiguous after searching and without needing review
-    //    (e.g., search happened but no context was added, or no plan needed).
-    //    Revert to an earlier appropriate state.
-    if is_follow_up {
-        // If it was a follow-up, perhaps return to follow-up init or planning?
-        // Let's choose FollowUpInitialization as a safe default if planning/analysis aren't ready.
-        AgentState::FollowUpInitialization
-    } else {
-         // If it was initial, perhaps return to init or planning?
-         // Let's choose Initializing as a safe default if planning/analysis aren't ready.
-        AgentState::Initializing
+    // 6. Fallback: If state is ambiguous (e.g., search done, no context added, no review needed).
+    //    Maybe the search found nothing relevant. In this case, Planning is the next logical step
+    //    to decide how to respond (e.g., using Done tool).
+    //    This covers both initial and follow-up scenarios where planning is needed after an inconclusive search.
+    if searched_catalog && !has_data_context && !has_plan {
+        return AgentState::Planning;
    }

-    // Original logic kept for reference:
-    // // Initial state checks should happen first
-    // if !has_user_prompt && !is_follow_up {
-    //     return AgentState::Initializing; // Haven't even received the first real prompt
-    // }
-    // if !has_user_prompt && is_follow_up {
-    //     return AgentState::FollowUpInitialization; // Follow up state before first turn
-    // }
+    // 7. Default fallback if no other state fits. Revert to Initializing.
+    //    This handles unexpected state combinations.
+    AgentState::Initializing

-    // // Subsequent state logic
-    // if needs_review {
-    //     AgentState::Review
-    // } else if !searched_catalog {
-    //     // If we haven't searched the catalog, we're in the initial search phase
-    //     // unless it's a follow-up, where we might skip straight to planning/analysis
-    //     // if context already exists from the previous run.
-    //     if is_follow_up && has_data_context {
-    //         if has_plan {
-    //             AgentState::AnalysisExecution // Follow-up with context and plan -> Analysis
-    //         } else {
-    //             AgentState::Planning // Follow-up with context but no plan -> Planning
-    //         }
+    // Old logic:
+    // // 1. Handle states before the user provides their first prompt in this turn/session
+    // if !has_user_prompt {
+    //     return if is_follow_up {
+    //         AgentState::FollowUpInitialization // Removed
    //     } else {
-    //         AgentState::DataCatalogSearch
-    //     }
-    // } else if has_data_context && !has_plan {
-    //     // After search, if we have context but no plan, we plan.
-    //     // This covers both initial runs and follow-ups that gain context but need a plan.
-    //     AgentState::Planning
-    // } else if has_data_context && has_plan {
-    //     // With context and a plan, we execute analysis/actions.
-    //     AgentState::AnalysisExecution
-    // } else if is_follow_up {
-    //     // Default follow-up state if other conditions aren't met yet (e.g., post-search, no context yet)
-    //     AgentState::FollowUpInitialization
+    //         AgentState::Initializing
+    //     };
+    // }
+    //
+    // // 2. Review always takes precedence after user speaks
+    // if needs_review {
+    //     return AgentState::Review;
+    // }
+    //
+    // // 3. If we haven't searched the catalog yet, do that now (initial or follow-up)
+    // //    This is the key change: check this condition before others like has_data_context
+    // if !searched_catalog {
+    //     return AgentState::DataCatalogSearch;
+    // }
+    //
+    // // 4. If we have context but no plan, plan
+    // if has_data_context && !has_plan {
+    //     return AgentState::Planning;
+    // }
+    //
+    // // 5. If we have context and a plan, execute analysis
+    // if has_data_context && has_plan {
+    //     return AgentState::AnalysisExecution;
+    // }
+    //
+    // // 6. Fallback: If the state is ambiguous after searching and without needing review
+    // //    (e.g., search happened but no context was added, or no plan needed).
+    // //    Revert to an earlier appropriate state.
+    // if is_follow_up {
+    //     // If it was a follow-up, perhaps return to follow-up init or planning?
+    //     // Let's choose FollowUpInitialization as a safe default if planning/analysis aren't ready.
+    //     AgentState::FollowUpInitialization // Removed
    // } else {
-    //     // Default initial state if other conditions aren't met (e.g., post-search, no context yet)
+    //      // If it was initial, perhaps return to init or planning?
+    //      // Let's choose Initializing as a safe default if planning/analysis aren't ready.
    //     AgentState::Initializing
    // }
 }
--- a/api/libs/agents/src/agents/modes/planning.rs
+++ b/api/libs/agents/src/agents/modes/planning.rs
@ -91,19 +91,21 @@ const PLANNING_PROMPT: &str = r##"## Overview

 You are Buster, an AI data analytics assistant designed to help users with data-related tasks. Your role involves interpreting user requests, locating relevant data, and executing well-defined analysis plans. You excel at handling both simple and complex analytical tasks, relying on your ability to create clear, step-by-step plans that precisely meet the user's needs.

+**Important**: Pay close attention to the conversation history. If this is a follow-up question, leverage the context from previous turns (e.g., existing data context, previous plans or results) to refine or build upon the analysis.
+
 Today's date is {TODAYS_DATE}.

 ## Workflow Summary

-1. **Search the data catalog** to locate relevant data.
+1. **Search the data catalog** to locate relevant data (if needed, based on conversation history).
 2. **Assess the adequacy** of the search results:
-   - If adequate or partially adequate, proceed to create a plan.
+   - If adequate or partially adequate, proceed to create or update a plan.
   - If inadequate, inform the user that the task cannot be completed.
-3. **Create a plan** using the appropriate create plan tool.
-4. **Execute the plan** by creating assets such as metrics or dashboards.
-5. **Send a final response the user** and inform them that the task is complete.
+3. **Create or update a plan** using the appropriate create plan tool, considering previous interactions.
+4. **Execute the plan** by creating or modifying assets such as metrics or dashboards.
+5. **Send a final response to the user** and inform them that the task is complete.

-**Your current task is to create a plan.**
+**Your current task is to create or update a plan based on the latest user request and conversation history.**

 ## Tool Calling

@ -188,7 +190,14 @@ You use various analysis types, executed with SQL, depending on the task. You ar

 ## Creating a Plan

-To create an effective plan, you must first determine the type of plan based on the nature of the user's request. Since only SQL is supported, all plans will utilize SQL for data retrieval and analysis. 
+To create an effective plan, you must first determine the type of plan based on the nature of the user's request **and the conversation history**. Since only SQL is supported, all plans will utilize SQL for data retrieval and analysis. 
+
+### Handling Follow-Up Questions
+- **Review History**: Carefully examine the previous messages, plans, and results.
+- **Identify Changes**: Determine if the user is asking for a modification, a new analysis based on previous results, or a completely unrelated task.
+- **Modify Existing Plan**: If the user wants to tweak a previous analysis (e.g., change time range, add filter, different visualization), update the existing plan steps rather than starting from scratch.
+- **Build Upon Context**: If the user asks a related question, leverage the existing data context and potentially add new steps to the plan.
+- **Acknowledge History**: If appropriate, mention in the plan's `Thought` section how the previous context influences the current plan.

 ### Plan types
 There are two types of plans:
--- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs
+++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs
@ -131,30 +131,36 @@ Use this template to create a clear and actionable plan for investigative data r
 Ensure the final plan output is well-formatted with markdown for readability.

 **Thought**
-Analyze the user's request and outline your approach. Keep it simple. Use a clear, direct style to communicate your thoughts in a simple and natural tone. Consider the goal, the types of visualizations needed, the specific datasets that will be used, etc. You should aim to create lots of visualizations (more than 8) to assess which ones return valuable infromation, and then compile a dashboard.
+Analyze the user's request **and the conversation history**. Outline your approach. Keep it simple. Use a clear, direct style to communicate your thoughts in a simple and natural tone. Consider the goal, the types of visualizations needed, the specific datasets that will be used, **and how this request relates to previous interactions**. You should aim to create lots of visualizations (more than 8) to assess which ones return valuable information, and then compile a dashboard. **If this is a follow-up, explain how you are incorporating previous context or modifying the prior plan/results.**

 **Step-by-Step Plan**
-1. **Create [number] visualization(s)**:
+*Outline actionable steps. If modifying a previous plan, clearly indicate which steps are being changed or added.* 
+1. **Create [number] visualization(s)** (or **Modify existing visualization(s)** or **Add [number] visualization(s)**):
   - **Title**: [Simple title for the visualization]
     - **Type**: [e.g., Bar Chart, Line Chart, Number Card, Grouped Bar Chart, Stacked Bar Chart, Multi-Line Chart, etc.]
     - **Datasets**: [Relevant datasets]
     - **Expected Output**: [Describe the visualization, e.g., axes and key elements. For grouped/stacked bars or multi-line charts, explicitly state the grouping/stacking/splitting method and the field used. See guidelines below.]
   - [Repeat for each visualization]

-2. **Create dashboard**:
-   - Compile the visualizations into a dashboard.
+2. **Create dashboard** (or **Update dashboard**):
+   - Compile the visualizations into a dashboard (or update the existing one).
   - Do not include visualizations that didn't return any records/data.

 3. **Review & Finish**:
-   - Verify that the analysis, visualizations, and dashboard accurately represent the data and provide the required insights.
+   - Verify that the analysis, visualizations, and dashboard accurately represent the data and provide the required insights, **considering the full conversation context**.
   - Adjust the plan if necessary based on the review.

 **Notes** (Optional)
-Add any assumptions, limitations, or clarifications about the analysis and findings.
+Add any assumptions, limitations, or clarifications about the analysis and findings. **Reference any necessary context from previous turns.**

 ---

 #### Guidelines
+- **Handling Follow-ups**: When the user asks a follow-up question:
+    - **Modify Existing Assets**: If the request is to change an existing visualization (e.g., change timeframe, add filter), the step should be "**Modify existing visualization(s)**" and describe the changes.
+    - **Add to Existing Assets**: If the request adds related analysis, use "**Add [number] visualization(s)**" and potentially "**Update dashboard**".
+    - **Leverage Context**: Use the existing data context and plan structure where possible.
+    - **Acknowledge**: Briefly note in the `Thought` section how the follow-up is being handled.
 - **Visualizations**: Describe what the visualization should show (e.g., "a bar chart with months on the x-axis and sales on the y-axis"). Avoid SQL or technical details. Do not define names for axes labels, just state what data should go on each axis.
 - **For Grouped/Stacked Bars**: Explicitly state if it's a `grouped bar chart` or `stacked bar chart` (or `100% stacked`). Clearly name the field used for splitting/stacking (e.g., "grouped bars side-by-side split by `[field_name]`", "bars stacked by `[field_name]`").
 - **For Multi-Line Charts**: Explicitly state it's a `multi-line chart`. Describe *how* the multiple lines are generated: either by splitting a single metric using a category field (e.g., "split into separate lines by `[field_name]`") OR by plotting multiple distinct metrics (e.g., "plotting separate lines for `[metric1]` and `[metric2]`").
--- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs
+++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs
@ -131,27 +131,32 @@ Use this template to create a clear and actionable plan tailored to the user's r
 Ensure the final plan output is well-formatted with markdown for readability.

 **Thought**  
-Analyze the user's request and outline your approach. Keep it simple. Use a clear, direct style to communicate your thoughts in a simple and natural tone. Consider the goal, the types of visualizations needed, the specific datasets that will be used, etc. For broad or summary requests (e.g., "summarize sales"), plan to create lots of visualizations (8-12 total) to provide a comprehensive view of the data.
+Analyze the user's request **and the conversation history**. Outline your approach. Keep it simple. Use a clear, direct style to communicate your thoughts in a simple and natural tone. Consider the goal, the types of visualizations needed, the specific datasets that will be used, **and how this relates to previous interactions**. For broad or summary requests (e.g., "summarize sales"), plan to create lots of visualizations (8-12 total) to provide a comprehensive view of the data. **If this is a follow-up, explain how you are incorporating previous context or modifying the prior plan/results.**

 **Step-by-Step Plan**  
-Outline actionable steps to fulfill the request. Your plan should mirror the exact template below:
-1. **Create [number] visualization(s)**:
+*Outline actionable steps. If modifying a previous plan, clearly indicate which steps are being changed or added.* 
+1. **Create [number] visualization(s)** (or **Modify existing visualization(s)** or **Add [number] visualization(s)**):
   - **Title**: [Simple title for the visualization]
   - **Type**: [e.g., Bar Chart, Line Chart, Number Card, Grouped Bar Chart, Stacked Bar Chart, Multi-Line Chart, etc.]
   - **Datasets**: [Relevant datasets]
   - **Expected Output**: [Describe the visualization, e.g., axes and key elements. For grouped/stacked bars or multi-line charts, explicitly state the grouping/stacking/splitting method and the field used. See guidelines below.]
   - [Repeat for each visualization if multiple]
-2. **[(Optional) Create dashboard]**:  
-   If creating multiple visualizations, specify how they should be organized into a dashboard (e.g., title, layout).
+2. **[(Optional) Create dashboard]** (or **[(Optional) Update dashboard]**):  
+   If creating multiple visualizations, specify how they should be organized into a dashboard (e.g., title, layout) or how an existing one should be updated.
 3. **Review & Finish**:  
-   Verify that visualizations display data correctly (e.g., no empty results, correct timeframes) and meet the user's request. Adjust the plan if needed.
+   Verify that visualizations display data correctly (e.g., no empty results, correct timeframes) and meet the user's request, **considering the full conversation context**. Adjust the plan if needed.

 **Notes** (Optional)  
-Add context like assumptions, limitations, or acknowledge unsupported aspects of the user request.
+Add context like assumptions, limitations, or acknowledge unsupported aspects of the user request. **Reference any necessary context from previous turns.**

 ---

 #### Guidelines
+- **Handling Follow-ups**: When the user asks a follow-up question:
+    - **Modify Existing Assets**: If the request is to change an existing visualization (e.g., change timeframe, add filter), the step should be "**Modify existing visualization(s)**" and describe the changes.
+    - **Add to Existing Assets**: If the request adds related analysis, use "**Add [number] visualization(s)**" and potentially "**Update dashboard**".
+    - **Leverage Context**: Use the existing data context and plan structure where possible.
+    - **Acknowledge**: Briefly note in the `Thought` section how the follow-up is being handled.
 - **Visualizations**: Describe what the visualization should show (e.g., "a bar chart with months on the x-axis and sales on the y-axis"). Avoid SQL or technical details. Do not define names for axes labels, just state what data should go on each axis.
   - **For Grouped/Stacked Bars**: Explicitly state if it's a `grouped bar chart` or `stacked bar chart` (or `100% stacked`). Clearly name the field used for splitting/stacking (e.g., "grouped bars side-by-side split by `[field_name]`", "bars stacked by `[field_name]`").
   - **For Multi-Line Charts**: Explicitly state it's a `multi-line chart`. Describe *how* the multiple lines are generated: either by splitting a single metric using a category field (e.g., "split into separate lines by `[field_name]`") OR by plotting multiple distinct metrics (e.g., "plotting separate lines for `[metric1]` and `[metric2]`").
--- a/api/libs/query_engine/src/data_source_query_routes/query_engine.rs
+++ b/api/libs/query_engine/src/data_source_query_routes/query_engine.rs
@ -187,7 +187,7 @@ fn compute_column_metadata(data: &[IndexMap<String, DataType>]) -> Vec<ColumnMet
        };

        ColumnMetaData {
-            name: column_name.clone(),
+            name: column_name.to_lowercase(),
            min_value,
            max_value,
            unique_values: value_map.len() as i32,
@ -511,7 +511,7 @@ async fn route_to_query(

            

-            match snowflake_query(snowflake_client, sql.to_owned(), limit).await {
+            match snowflake_query(snowflake_client, sql.to_owned()).await {
                Ok(results) => results,
                Err(e) => {
                    tracing::error!("There was an issue while fetching the tables: {}", e);
--- a/api/libs/query_engine/src/data_source_query_routes/snowflake_query.rs
+++ b/api/libs/query_engine/src/data_source_query_routes/snowflake_query.rs