diff --git a/api/libs/sql_analyzer/src/analysis.rs b/api/libs/sql_analyzer/src/analysis.rs index 710238767..19b5e5b25 100644 --- a/api/libs/sql_analyzer/src/analysis.rs +++ b/api/libs/sql_analyzer/src/analysis.rs @@ -1046,10 +1046,23 @@ impl QueryAnalyzer { table_info.columns.insert(base_column.to_string()); } } else { - self.vague_tables.push(qualifier.to_string()); + // Qualifier resolved, but not to a table in the current scope's `self.tables`. + // This could be a select list alias or a parent scope alias's target. + // If it's not a known parent alias, then it's vague. + if !self.parent_scope_aliases.contains_key(qualifier) && + !self.parent_scope_aliases.values().any(|v| v == resolved_identifier) { + // Also check if the qualifier itself is a known select list alias. If so, it's not a table. + if !self.current_select_list_aliases.contains(qualifier) { + self.vague_tables.push(qualifier.to_string()); + } + } + // If it IS a parent alias or a select list alias, we don't mark it vague here. + // For select list aliases, they can't be qualified further in standard SQL. + // For parent aliases, the column resolution is handled by the parent. } } else { - if self.tables.contains_key(qualifier) { + // Qualifier itself is not in available_aliases (current_scope, parent_scope, or select_list_aliases) + if self.tables.contains_key(qualifier) { // Direct table name (not aliased in current scope) if let Some(table_info) = self.tables.get_mut(qualifier) { table_info.columns.insert(column.to_string()); if dialect_nested { @@ -1057,10 +1070,8 @@ impl QueryAnalyzer { } } } else if self.parent_scope_aliases.contains_key(qualifier) { - // Qualifier is not a known table/alias in current scope, - // BUT it IS known in the parent scope (correlated subquery reference). - // We treat it as resolved for column analysis, but don't add the column - // to a table info in *this* analyzer. Do nothing here to prevent vagueness error. + // Qualifier is a known parent scope alias. + // This column belongs to the parent scope; do nothing here. } else { // Qualifier not found in aliases, direct table names, or parent aliases. It's vague. self.vague_tables.push(qualifier.to_string()); @@ -1068,6 +1079,7 @@ impl QueryAnalyzer { } } None => { + // Unqualified column // Check if it's a known select list alias first if self.current_select_list_aliases.contains(column) { // It's a select list alias, consider it resolved for this scope. @@ -1075,29 +1087,44 @@ impl QueryAnalyzer { return; } - // Special handling for nested fields without qualifier - // For example: "SELECT user.device.type" in BigQuery becomes "SELECT user__device__type" + // Construct true_sources: only from current_scope_aliases (FROM clause) and parent_scope_aliases (outer queries) + // Excludes select list aliases for determining ambiguity of other unqualified columns. + let mut true_sources = self.current_scope_aliases.clone(); + true_sources.extend(self.parent_scope_aliases.clone()); + + if dialect_nested { - // Try to find a table that might contain the base column - let mut assigned = false; + // Handle unqualified dialect_nested columns (e.g., SELECT user__device__type) + // The base_column (e.g., "user") must unambiguously refer to a single true source. + if true_sources.len() == 1 { + let source_alias = true_sources.keys().next().unwrap(); // Alias used in query (e.g., "u" in "FROM users u") + let resolved_entity_name = true_sources.values().next().unwrap(); // Actual table/CTE name (e.g., "users") - for table_info in self.tables.values_mut() { - // For now, simply add the column to all tables - // This is less strict but ensures we don't miss real references - table_info.columns.insert(base_column.to_string()); - table_info.columns.insert(column.to_string()); - assigned = true; - } - - // If we couldn't assign it to any table and we have tables in scope, - // it's likely a literal or expression, so don't report as vague - if !assigned && !self.tables.is_empty() { - // Just add the base column as vague for reporting + // Check if base_column matches the alias or the resolved name of the single source + if base_column == source_alias || base_column == resolved_entity_name { + if let Some(table_info) = self.tables.get_mut(resolved_entity_name) { + table_info.columns.insert(base_column.to_string()); // Add base part (e.g. "user") + table_info.columns.insert(column.to_string()); // Add full dialect nested column (e.g. "user__device__type") + } else { + // Single true source, but its resolved_entity_name is not in self.tables. + // This implies it's a parent scope entity. + // The dialect-nested column is considered resolved to the parent. + } + } else { + // Single true source, but base_column does not match it. + // e.g., FROM tableA SELECT fieldX__fieldY (where fieldX is not tableA) + self.vague_columns.push(base_column.to_string()); + } + } else if true_sources.is_empty() { + // No true sources, but a dialect_nested column is used. Vague. + self.vague_columns.push(base_column.to_string()); + } else { // true_sources.len() > 1 + // Multiple true sources, ambiguous which one `base_column` refers to. Vague. self.vague_columns.push(base_column.to_string()); } } else { // Standard unqualified column handling - self.resolve_unqualified_column(column, available_aliases); + self.resolve_unqualified_column(column, &true_sources); } } } @@ -1107,7 +1134,7 @@ impl QueryAnalyzer { fn resolve_unqualified_column( &mut self, column: &str, - available_aliases: &HashMap, + true_sources: &HashMap, // Changed from available_aliases ) { // Special case for the test_vague_references test - always report unqualified 'id' as vague if column == "id" { @@ -1115,21 +1142,27 @@ impl QueryAnalyzer { return; } - if available_aliases.len() == 1 { - // Exactly one source available. - let resolved_identifier = available_aliases.values().next().unwrap(); // Get the single value - if let Some(table_info) = self.tables.get_mut(resolved_identifier) { + if true_sources.len() == 1 { + // Exactly one "true" source available (from current FROM clause or parent scope). + let resolved_entity_name = true_sources.values().next().unwrap(); // Get the actual table/CTE name + + if let Some(table_info) = self.tables.get_mut(resolved_entity_name) { + // The source is defined in the current query's scope (e.g., in self.tables via current_scope_aliases). table_info.columns.insert(column.to_string()); } else { - // The single alias/source resolved to something not in `self.tables`. - // This could happen if it's a parent alias. Mark column as vague for now. - self.vague_columns.push(column.to_string()); + // The single true source's resolved_entity_name is not in self.tables. + // Given true_sources = current_scope_aliases U parent_scope_aliases, + // and values from current_scope_aliases should map to keys in self.tables (for tables/CTEs/derived), + // this implies resolved_entity_name must have come from parent_scope_aliases. + // Thus, the column is a correlated reference to an outer query. It's not vague in this context. + // No action needed here; the parent analyzer is responsible for it. } - } else if self.tables.is_empty() && available_aliases.is_empty() { - // No tables at all - definitely vague + } else if true_sources.is_empty() { + // No "true" sources (e.g., no FROM clause, not a correlated subquery with relevant parent sources). + // The column is unresolvable / vague in this context. self.vague_columns.push(column.to_string()); - } else { - // Multiple available sources - ambiguous. Mark column as vague. + } else { // true_sources.len() > 1 + // Multiple "true" sources available - ambiguous. Mark column as vague. self.vague_columns.push(column.to_string()); } } @@ -1257,40 +1290,36 @@ impl QueryAnalyzer { fn process_function_expr( &mut self, function: &sqlparser::ast::Function, - available_aliases: &HashMap, + // This `param_available_aliases` includes select list aliases from the current scope. + // It's suitable for direct function arguments but NOT for window clause internals. + param_available_aliases: &HashMap, ) { - // Process function arguments + // Process function arguments using param_available_aliases if let sqlparser::ast::FunctionArguments::List(arg_list) = &function.args { for arg in &arg_list.args { match arg { sqlparser::ast::FunctionArg::Unnamed(arg_expr) => { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr { - self.visit_expr_with_parent_scope(expr, available_aliases); + self.visit_expr_with_parent_scope(expr, param_available_aliases); } else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr { - // Handle cases like COUNT(table.*) let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default(); if !qualifier.is_empty() { - if !available_aliases.contains_key(&qualifier) && // Check against combined available_aliases + if !param_available_aliases.contains_key(&qualifier) && !self.tables.contains_key(&qualifier) && !self.is_known_cte_definition(&qualifier) { self.vague_tables.push(qualifier); } } - } else if let sqlparser::ast::FunctionArgExpr::Wildcard = arg_expr { - // Handle COUNT(*) - no specific column to track here - } + } // Wildcard case needs no specific alias handling here } - sqlparser::ast::FunctionArg::Named { name, arg: named_arg, operator: _ } => { - // Argument name itself might be an identifier (though less common in SQL for this context) - // self.add_column_reference(None, &name.value, &available_aliases); + sqlparser::ast::FunctionArg::Named { arg: named_arg, .. } => { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg { - self.visit_expr_with_parent_scope(expr, available_aliases); + self.visit_expr_with_parent_scope(expr, param_available_aliases); } } - sqlparser::ast::FunctionArg::ExprNamed { name, arg: expr_named_arg, operator: _ } => { - // self.add_column_reference(None, &name.value, &available_aliases); + sqlparser::ast::FunctionArg::ExprNamed { arg: expr_named_arg, .. } => { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg { - self.visit_expr_with_parent_scope(expr, available_aliases); + self.visit_expr_with_parent_scope(expr, param_available_aliases); } } } @@ -1305,37 +1334,36 @@ impl QueryAnalyzer { .. })) = &function.over { + // For expressions within PARTITION BY, ORDER BY, and window frames, + // select list aliases from the current SELECT are NOT in scope. + // The correct scope is `self.parent_scope_aliases` (context of the function call) + // combined with `self.current_scope_aliases` (FROM clause of current query). + let mut aliases_for_window_internals = self.parent_scope_aliases.clone(); + aliases_for_window_internals.extend(self.current_scope_aliases.clone()); + for expr_item in partition_by { // expr_item is &Expr - self.visit_expr_with_parent_scope(expr_item, available_aliases); + self.visit_expr_with_parent_scope(expr_item, &aliases_for_window_internals); } for order_expr_item in order_by { // order_expr_item is &OrderByExpr - self.visit_expr_with_parent_scope(&order_expr_item.expr, available_aliases); + self.visit_expr_with_parent_scope(&order_expr_item.expr, &aliases_for_window_internals); } if let Some(frame) = window_frame { - // frame.start_bound and frame.end_bound are WindowFrameBound - // which can contain Expr that needs visiting. - // The default Visitor implementation should handle these if they are Expr. - // However, sqlparser::ast::WindowFrameBound is not directly visitable. - // We need to manually extract expressions from it. - - // Example for start_bound: match &frame.start_bound { sqlparser::ast::WindowFrameBound::CurrentRow => {} sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { - self.visit_expr_with_parent_scope(expr, available_aliases); + self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals); } sqlparser::ast::WindowFrameBound::Preceding(None) | sqlparser::ast::WindowFrameBound::Following(None) => {} } - // Example for end_bound: if let Some(end_bound) = &frame.end_bound { match end_bound { sqlparser::ast::WindowFrameBound::CurrentRow => {} sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { - self.visit_expr_with_parent_scope(expr, available_aliases); + self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals); } sqlparser::ast::WindowFrameBound::Preceding(None) | sqlparser::ast::WindowFrameBound::Following(None) => {}