diff --git a/api/libs/agents/src/agent.rs b/api/libs/agents/src/agent.rs index eb0235a46..3a1387898 100644 --- a/api/libs/agents/src/agent.rs +++ b/api/libs/agents/src/agent.rs @@ -652,7 +652,7 @@ impl Agent { session_id: thread_ref.id.to_string(), trace_id: Uuid::new_v4().to_string(), }), - reasoning_effort: Some("high".to_string()), + reasoning_effort: Some("medium".to_string()), ..Default::default() }; diff --git a/api/libs/agents/src/agents/buster_multi_agent.rs b/api/libs/agents/src/agents/buster_multi_agent.rs index 16d380d16..3bc2e0000 100644 --- a/api/libs/agents/src/agents/buster_multi_agent.rs +++ b/api/libs/agents/src/agents/buster_multi_agent.rs @@ -178,7 +178,7 @@ impl BusterMultiAgent { let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // Create agent, passing the provider diff --git a/api/libs/agents/src/agents/modes/analysis.rs b/api/libs/agents/src/agents/modes/analysis.rs index eba8e59e5..8939f9c36 100644 --- a/api/libs/agents/src/agents/modes/analysis.rs +++ b/api/libs/agents/src/agents/modes/analysis.rs @@ -49,7 +49,7 @@ pub fn get_configuration(agent_data: &ModeAgentData, data_source_syntax: Option< let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // 3. Define the tool loader closure @@ -349,6 +349,12 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final - **SQL Requirements**: - Use database-qualified schema-qualified table names (`..`). - Use fully qualified column names with table aliases (e.g., `.`). + - **MANDATORY SQL NAMING CONVENTIONS**: + - **All Table References**: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`. + - **All Column References**: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`). + - **Inside CTE Definitions**: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table. + - **Selecting From CTEs**: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`). + - **Universal Application**: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors. - **Context Adherence**: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns. - Select specific columns (avoid `SELECT *` or `COUNT(*)`). - Use CTEs instead of subqueries, and use snake_case for naming them. diff --git a/api/libs/agents/src/agents/modes/data_catalog_search.rs b/api/libs/agents/src/agents/modes/data_catalog_search.rs index 22728aa1b..6100cdb64 100644 --- a/api/libs/agents/src/agents/modes/data_catalog_search.rs +++ b/api/libs/agents/src/agents/modes/data_catalog_search.rs @@ -36,7 +36,7 @@ pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // 3. Define the tool loader closure diff --git a/api/libs/agents/src/agents/modes/follow_up_initialization.rs b/api/libs/agents/src/agents/modes/follow_up_initialization.rs index b6d51d7ff..73f4fae55 100644 --- a/api/libs/agents/src/agents/modes/follow_up_initialization.rs +++ b/api/libs/agents/src/agents/modes/follow_up_initialization.rs @@ -41,7 +41,7 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // 3. Define the tool loader closure diff --git a/api/libs/agents/src/agents/modes/initialization.rs b/api/libs/agents/src/agents/modes/initialization.rs index b86595b0a..40aa005fa 100644 --- a/api/libs/agents/src/agents/modes/initialization.rs +++ b/api/libs/agents/src/agents/modes/initialization.rs @@ -39,7 +39,7 @@ pub fn get_configuration( if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // 3. Define the tool loader closure diff --git a/api/libs/agents/src/agents/modes/planning.rs b/api/libs/agents/src/agents/modes/planning.rs index b227fe60a..a894accfb 100644 --- a/api/libs/agents/src/agents/modes/planning.rs +++ b/api/libs/agents/src/agents/modes/planning.rs @@ -39,7 +39,7 @@ pub fn get_configuration( if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-pro-exp-03-25".to_string() + "o4-mini".to_string() }; // 3. Define the tool loader closure diff --git a/api/libs/agents/src/agents/modes/review.rs b/api/libs/agents/src/agents/modes/review.rs index 834b38ff7..f95022a70 100644 --- a/api/libs/agents/src/agents/modes/review.rs +++ b/api/libs/agents/src/agents/modes/review.rs @@ -31,7 +31,7 @@ pub fn get_configuration( if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" { "o4-mini".to_string() } else { - "gemini-2.5-flash-preview-04-17".to_string() + "gemini-2.0-flash-001".to_string() }; // 3. Define the tool loader closure diff --git a/api/libs/agents/src/tools/categories/file_tools/common.rs b/api/libs/agents/src/tools/categories/file_tools/common.rs index fbf30d4ca..9164b3c84 100644 --- a/api/libs/agents/src/tools/categories/file_tools/common.rs +++ b/api/libs/agents/src/tools/categories/file_tools/common.rs @@ -163,6 +163,7 @@ pub const METRIC_YML_SCHEMA: &str = r##" # Note: Respond only with the time period, without explanation or additional copy. # `sql`: The SQL query for the metric. # - RULE: MUST use the pipe `|` block scalar style to preserve formatting and newlines. +# - NOTE: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This applies to all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs. # - Example: # sql: | # SELECT ... @@ -231,6 +232,7 @@ properties: SQL query using YAML pipe syntax (|). The SQL query should be formatted with proper indentation using the YAML pipe (|) syntax. This ensures the multi-line SQL is properly parsed while preserving whitespace and newlines. + IMPORTANT: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This rule is critical for all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs. Example: sql: | SELECT column1, column2 diff --git a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs index 421862866..fb9a9c8f8 100644 --- a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs +++ b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs @@ -367,7 +367,7 @@ impl ToolExecutor for CreateDashboardFilesTool { async fn get_schema(&self) -> Value { serde_json::json!({ "name": self.get_name(), - "strict": false, + "strict": true, "parameters": { "type": "object", "required": ["files"], @@ -377,7 +377,7 @@ impl ToolExecutor for CreateDashboardFilesTool { "items": { "type": "object", "required": ["name", "yml_content"], - "strict": false, + "strict": true, "properties": { "name": { "type": "string", diff --git a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs index e2ea27e4d..07981806a 100644 --- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs +++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs @@ -308,7 +308,7 @@ impl ToolExecutor for CreateMetricFilesTool { serde_json::json!({ "name": self.get_name(), "description": get_create_metrics_description().await, - "strict": false, + "strict": true, "parameters": { "type": "object", "required": ["files"], @@ -318,7 +318,7 @@ impl ToolExecutor for CreateMetricFilesTool { "items": { "type": "object", "required": ["name", "yml_content"], - "strict": false, + "strict": true, "properties": { "name": { "type": "string", diff --git a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs index 1eab3bfc0..3487524a4 100644 --- a/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs +++ b/api/libs/agents/src/tools/categories/file_tools/modify_dashboards.rs @@ -382,7 +382,7 @@ impl ToolExecutor for ModifyDashboardFilesTool { serde_json::json!({ "name": self.get_name(), "description": get_modify_dashboards_description().await, - "strict": false, + "strict": true, "parameters": { "type": "object", "required": ["files"], @@ -393,7 +393,7 @@ impl ToolExecutor for ModifyDashboardFilesTool { "items": { "type": "object", "required": ["id", "yml_content"], - "strict": false, + "strict": true, "properties": { "id": { "type": "string", diff --git a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs index 5b9b38ca0..a426dea11 100644 --- a/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs +++ b/api/libs/agents/src/tools/categories/file_tools/modify_metrics.rs @@ -486,7 +486,7 @@ impl ToolExecutor for ModifyMetricFilesTool { serde_json::json!({ "name": self.get_name(), "description": get_modify_metrics_description().await, - "strict": false, + "strict": true, "parameters": { "type": "object", "required": ["files"], diff --git a/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs b/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs index ef1b72723..c2d232320 100644 --- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs +++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_investigative.rs @@ -87,7 +87,7 @@ impl ToolExecutor for CreatePlanInvestigative { serde_json::json!({ "name": self.get_name(), "description": get_create_plan_investigative_description().await, - "strict": false, + "strict": true, "parameters": { "type": "object", "required": [ diff --git a/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs b/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs index f04cf073b..120d7734e 100644 --- a/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs +++ b/api/libs/agents/src/tools/categories/planning_tools/create_plan_straightforward.rs @@ -84,7 +84,7 @@ impl ToolExecutor for CreatePlanStraightforward { serde_json::json!({ "name": self.get_name(), "description": get_create_plan_straightforward_description().await, - "strict": false, + "strict": true, "parameters": { "type": "object", "required": [ diff --git a/api/libs/litellm/src/types.rs b/api/libs/litellm/src/types.rs index 1f93fa965..dae88a674 100644 --- a/api/libs/litellm/src/types.rs +++ b/api/libs/litellm/src/types.rs @@ -1,6 +1,7 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; +use std::env; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ChatCompletionRequest { @@ -50,7 +51,7 @@ pub struct ChatCompletionRequest { pub parallel_tool_calls: Option, #[serde(skip_serializing_if = "Option::is_none")] pub user: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(skip_serializing_if = "should_skip_metadata")] pub metadata: Option, } @@ -1065,3 +1066,17 @@ mod tests { assert_eq!(deserialized_resp.usage.total_tokens, 99); } } + +fn should_skip_metadata(metadata: &Option) -> bool { + let env_var_condition_met = match env::var("LLM_BASE_URL") { + Ok(val) => val == "https://api.openai.com/v1", + Err(_) => false, // If env var is not set or any error, condition is not met + }; + + if env_var_condition_met { + return true; // If env var condition is met, always skip + } + + // If env var condition is not met, then skip only if metadata is None + metadata.is_none() +} diff --git a/api/libs/sql_analyzer/src/analysis.rs b/api/libs/sql_analyzer/src/analysis.rs index 5270fbaf1..23614f352 100644 --- a/api/libs/sql_analyzer/src/analysis.rs +++ b/api/libs/sql_analyzer/src/analysis.rs @@ -4,7 +4,7 @@ use anyhow::Result; use rand; use sqlparser::ast::{ Cte, Expr, Join, JoinConstraint, JoinOperator, ObjectName, Query, SelectItem, SetExpr, - Statement, TableFactor, Visit, Visitor, WindowSpec, + Statement, TableFactor, Visit, Visitor, WindowSpec, TableAlias, }; use sqlparser::dialect::GenericDialect; use sqlparser::parser::Parser; @@ -49,6 +49,7 @@ struct QueryAnalyzer { vague_columns: Vec, vague_tables: Vec, ctes: Vec, + current_select_list_aliases: HashSet, } impl QueryAnalyzer { @@ -65,6 +66,7 @@ impl QueryAnalyzer { current_from_relation_identifier: None, current_scope_aliases: HashMap::new(), parent_scope_aliases: HashMap::new(), + current_select_list_aliases: HashSet::new(), } } @@ -207,6 +209,7 @@ impl QueryAnalyzer { // Process a SELECT query fn process_select_query(&mut self, select: &sqlparser::ast::Select) { self.current_scope_aliases.clear(); + self.current_select_list_aliases.clear(); self.current_from_relation_identifier = None; let mut join_conditions_to_visit: Vec<&Expr> = Vec::new(); @@ -223,6 +226,14 @@ impl QueryAnalyzer { } } + // Populate select list aliases *before* processing expressions in WHERE, GROUP BY, HAVING, or SELECT list itself + // This makes them available for resolution in those clauses. + for item in &select.projection { + if let SelectItem::ExprWithAlias { alias, .. } = item { + self.current_select_list_aliases.insert(alias.value.clone()); + } + } + // Process all parts of the query with collected context let combined_aliases_for_visit = self .current_scope_aliases @@ -1031,6 +1042,13 @@ impl QueryAnalyzer { } } None => { + // Check if it's a known select list alias first + if self.current_select_list_aliases.contains(column) { + // It's a select list alias, consider it resolved for this scope. + // No need to add to vague_columns or assign to a table. + return; + } + // Special handling for nested fields without qualifier // For example: "SELECT user.device.type" in BigQuery becomes "SELECT user__device__type" if dialect_nested { @@ -1142,6 +1160,7 @@ impl QueryAnalyzer { scope_stack: self.scope_stack.clone(), parent_scope_aliases: HashMap::new(), current_scope_aliases: HashMap::new(), + current_select_list_aliases: HashSet::new(), ..QueryAnalyzer::new() } } @@ -1172,12 +1191,11 @@ impl Visitor for QueryAnalyzer { type Break = (); fn pre_visit_expr(&mut self, expr: &Expr) -> ControlFlow { - let available_aliases = self - .current_scope_aliases - .iter() - .chain(self.parent_scope_aliases.iter()) - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); + let mut available_aliases = self.parent_scope_aliases.clone(); + available_aliases.extend(self.current_scope_aliases.clone()); + for alias in &self.current_select_list_aliases { + available_aliases.insert(alias.clone(), alias.clone()); // Select list aliases map to themselves + } match expr { Expr::Identifier(ident) => { @@ -1222,14 +1240,29 @@ impl QueryAnalyzer { sqlparser::ast::FunctionArg::Unnamed(arg_expr) => { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr { self.visit_expr_with_parent_scope(expr, available_aliases); + } else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr { + // Handle cases like COUNT(table.*) + let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default(); + if !qualifier.is_empty() { + if !available_aliases.contains_key(&qualifier) && // Check against combined available_aliases + !self.tables.contains_key(&qualifier) && + !self.is_known_cte_definition(&qualifier) { + self.vague_tables.push(qualifier); + } + } + } else if let sqlparser::ast::FunctionArgExpr::Wildcard = arg_expr { + // Handle COUNT(*) - no specific column to track here } } - sqlparser::ast::FunctionArg::Named { arg: named_arg, .. } => { + sqlparser::ast::FunctionArg::Named { name, arg: named_arg, operator: _ } => { + // Argument name itself might be an identifier (though less common in SQL for this context) + // self.add_column_reference(None, &name.value, &available_aliases); if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg { self.visit_expr_with_parent_scope(expr, available_aliases); } } - sqlparser::ast::FunctionArg::ExprNamed { arg: expr_named_arg, .. } => { + sqlparser::ast::FunctionArg::ExprNamed { name, arg: expr_named_arg, operator: _ } => { + // self.add_column_reference(None, &name.value, &available_aliases); if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg { self.visit_expr_with_parent_scope(expr, available_aliases); } @@ -1246,16 +1279,41 @@ impl QueryAnalyzer { .. })) = &function.over { - for expr in partition_by { - self.visit_expr_with_parent_scope(expr, available_aliases); + for expr_item in partition_by { // expr_item is &Expr + self.visit_expr_with_parent_scope(expr_item, available_aliases); } - for order_expr in order_by { - self.visit_expr_with_parent_scope(&order_expr.expr, available_aliases); + for order_expr_item in order_by { // order_expr_item is &OrderByExpr + self.visit_expr_with_parent_scope(&order_expr_item.expr, available_aliases); } if let Some(frame) = window_frame { - frame.start_bound.visit(self); + // frame.start_bound and frame.end_bound are WindowFrameBound + // which can contain Expr that needs visiting. + // The default Visitor implementation should handle these if they are Expr. + // However, sqlparser::ast::WindowFrameBound is not directly visitable. + // We need to manually extract expressions from it. + + // Example for start_bound: + match &frame.start_bound { + sqlparser::ast::WindowFrameBound::CurrentRow => {} + sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | + sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { + self.visit_expr_with_parent_scope(expr, available_aliases); + } + sqlparser::ast::WindowFrameBound::Preceding(None) | + sqlparser::ast::WindowFrameBound::Following(None) => {} + } + + // Example for end_bound: if let Some(end_bound) = &frame.end_bound { - end_bound.visit(self); + match end_bound { + sqlparser::ast::WindowFrameBound::CurrentRow => {} + sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | + sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { + self.visit_expr_with_parent_scope(expr, available_aliases); + } + sqlparser::ast::WindowFrameBound::Preceding(None) | + sqlparser::ast::WindowFrameBound::Following(None) => {} + } } } } diff --git a/api/libs/sql_analyzer/tests/analysis_tests.rs b/api/libs/sql_analyzer/tests/analysis_tests.rs index 7ec815e4b..2fa8744ca 100644 --- a/api/libs/sql_analyzer/tests/analysis_tests.rs +++ b/api/libs/sql_analyzer/tests/analysis_tests.rs @@ -28,6 +28,52 @@ async fn test_simple_query() { assert!(table.columns.contains("name"), "Missing 'name' column"); } +#[tokio::test] +async fn test_complex_cte_with_date_function() { + let sql = "WITH top5 AS ( + SELECT ptr.product_name, SUM(ptr.metric_producttotalrevenue) AS total_revenue + FROM ont_ont.product_total_revenue AS ptr + GROUP BY ptr.product_name + ORDER BY total_revenue DESC + LIMIT 5 + ) + SELECT + MAKE_DATE(pqs.year::int, ((pqs.quarter - 1) * 3 + 1)::int, 1) AS quarter_start, + pqs.product_name, + SUM(pqs.metric_productquarterlysales) AS quarterly_revenue + FROM ont_ont.product_quarterly_sales AS pqs + JOIN top5 ON pqs.product_name = top5.product_name + GROUP BY quarter_start, pqs.product_name + ORDER BY quarter_start ASC, pqs.product_name;"; + + let result = analyze_query(sql.to_string()).await.unwrap(); + + // Check CTE + assert_eq!(result.ctes.len(), 1); + let cte = &result.ctes[0]; + assert_eq!(cte.name, "top5"); + assert_eq!(cte.summary.tables.len(), 1); + assert_eq!(cte.summary.joins.len(), 0); + + // Check main query tables + assert_eq!(result.tables.len(), 2); + let table_names: Vec = result.tables.iter().map(|t| t.table_identifier.clone()).collect(); + assert!(table_names.contains(&"product_quarterly_sales".to_string())); + assert!(table_names.contains(&"product_total_revenue".to_string())); + + // Check joins + assert_eq!(result.joins.len(), 1); + let join = result.joins.iter().next().unwrap(); + assert_eq!(join.left_table, "product_quarterly_sales"); + assert_eq!(join.right_table, "product_total_revenue"); + + // Check schema identifiers + for table in result.tables { + assert_eq!(table.schema_identifier, Some("ont_ont".to_string())); + } +} + + #[tokio::test] async fn test_joins() { let sql = diff --git a/api/makefile b/api/makefile index 2551b88d9..70a9ba25b 100644 --- a/api/makefile +++ b/api/makefile @@ -1,4 +1,6 @@ dev: + docker stop buster-redis-make || true && \ + docker rm buster-redis-make || true && \ cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \ supabase start supabase db reset @@ -26,6 +28,8 @@ stop: supabase stop fast: + docker stop buster-redis-make || true && \ + docker rm buster-redis-make || true && \ cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \ export RUST_LOG=debug && \ export CARGO_INCREMENTAL=1 && \