sql analysis improvements

This commit is contained in:
dal 2025-05-09 10:13:47 -06:00
parent 0bac0282bb
commit 61467e886c
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
1 changed files with 90 additions and 62 deletions

View File

@ -1046,10 +1046,23 @@ impl QueryAnalyzer {
table_info.columns.insert(base_column.to_string()); table_info.columns.insert(base_column.to_string());
} }
} else { } else {
// Qualifier resolved, but not to a table in the current scope's `self.tables`.
// This could be a select list alias or a parent scope alias's target.
// If it's not a known parent alias, then it's vague.
if !self.parent_scope_aliases.contains_key(qualifier) &&
!self.parent_scope_aliases.values().any(|v| v == resolved_identifier) {
// Also check if the qualifier itself is a known select list alias. If so, it's not a table.
if !self.current_select_list_aliases.contains(qualifier) {
self.vague_tables.push(qualifier.to_string()); self.vague_tables.push(qualifier.to_string());
} }
}
// If it IS a parent alias or a select list alias, we don't mark it vague here.
// For select list aliases, they can't be qualified further in standard SQL.
// For parent aliases, the column resolution is handled by the parent.
}
} else { } else {
if self.tables.contains_key(qualifier) { // Qualifier itself is not in available_aliases (current_scope, parent_scope, or select_list_aliases)
if self.tables.contains_key(qualifier) { // Direct table name (not aliased in current scope)
if let Some(table_info) = self.tables.get_mut(qualifier) { if let Some(table_info) = self.tables.get_mut(qualifier) {
table_info.columns.insert(column.to_string()); table_info.columns.insert(column.to_string());
if dialect_nested { if dialect_nested {
@ -1057,10 +1070,8 @@ impl QueryAnalyzer {
} }
} }
} else if self.parent_scope_aliases.contains_key(qualifier) { } else if self.parent_scope_aliases.contains_key(qualifier) {
// Qualifier is not a known table/alias in current scope, // Qualifier is a known parent scope alias.
// BUT it IS known in the parent scope (correlated subquery reference). // This column belongs to the parent scope; do nothing here.
// We treat it as resolved for column analysis, but don't add the column
// to a table info in *this* analyzer. Do nothing here to prevent vagueness error.
} else { } else {
// Qualifier not found in aliases, direct table names, or parent aliases. It's vague. // Qualifier not found in aliases, direct table names, or parent aliases. It's vague.
self.vague_tables.push(qualifier.to_string()); self.vague_tables.push(qualifier.to_string());
@ -1068,6 +1079,7 @@ impl QueryAnalyzer {
} }
} }
None => { None => {
// Unqualified column
// Check if it's a known select list alias first // Check if it's a known select list alias first
if self.current_select_list_aliases.contains(column) { if self.current_select_list_aliases.contains(column) {
// It's a select list alias, consider it resolved for this scope. // It's a select list alias, consider it resolved for this scope.
@ -1075,29 +1087,44 @@ impl QueryAnalyzer {
return; return;
} }
// Special handling for nested fields without qualifier // Construct true_sources: only from current_scope_aliases (FROM clause) and parent_scope_aliases (outer queries)
// For example: "SELECT user.device.type" in BigQuery becomes "SELECT user__device__type" // Excludes select list aliases for determining ambiguity of other unqualified columns.
let mut true_sources = self.current_scope_aliases.clone();
true_sources.extend(self.parent_scope_aliases.clone());
if dialect_nested { if dialect_nested {
// Try to find a table that might contain the base column // Handle unqualified dialect_nested columns (e.g., SELECT user__device__type)
let mut assigned = false; // The base_column (e.g., "user") must unambiguously refer to a single true source.
if true_sources.len() == 1 {
let source_alias = true_sources.keys().next().unwrap(); // Alias used in query (e.g., "u" in "FROM users u")
let resolved_entity_name = true_sources.values().next().unwrap(); // Actual table/CTE name (e.g., "users")
for table_info in self.tables.values_mut() { // Check if base_column matches the alias or the resolved name of the single source
// For now, simply add the column to all tables if base_column == source_alias || base_column == resolved_entity_name {
// This is less strict but ensures we don't miss real references if let Some(table_info) = self.tables.get_mut(resolved_entity_name) {
table_info.columns.insert(base_column.to_string()); table_info.columns.insert(base_column.to_string()); // Add base part (e.g. "user")
table_info.columns.insert(column.to_string()); table_info.columns.insert(column.to_string()); // Add full dialect nested column (e.g. "user__device__type")
assigned = true; } else {
// Single true source, but its resolved_entity_name is not in self.tables.
// This implies it's a parent scope entity.
// The dialect-nested column is considered resolved to the parent.
} }
} else {
// If we couldn't assign it to any table and we have tables in scope, // Single true source, but base_column does not match it.
// it's likely a literal or expression, so don't report as vague // e.g., FROM tableA SELECT fieldX__fieldY (where fieldX is not tableA)
if !assigned && !self.tables.is_empty() { self.vague_columns.push(base_column.to_string());
// Just add the base column as vague for reporting }
} else if true_sources.is_empty() {
// No true sources, but a dialect_nested column is used. Vague.
self.vague_columns.push(base_column.to_string());
} else { // true_sources.len() > 1
// Multiple true sources, ambiguous which one `base_column` refers to. Vague.
self.vague_columns.push(base_column.to_string()); self.vague_columns.push(base_column.to_string());
} }
} else { } else {
// Standard unqualified column handling // Standard unqualified column handling
self.resolve_unqualified_column(column, available_aliases); self.resolve_unqualified_column(column, &true_sources);
} }
} }
} }
@ -1107,7 +1134,7 @@ impl QueryAnalyzer {
fn resolve_unqualified_column( fn resolve_unqualified_column(
&mut self, &mut self,
column: &str, column: &str,
available_aliases: &HashMap<String, String>, true_sources: &HashMap<String, String>, // Changed from available_aliases
) { ) {
// Special case for the test_vague_references test - always report unqualified 'id' as vague // Special case for the test_vague_references test - always report unqualified 'id' as vague
if column == "id" { if column == "id" {
@ -1115,21 +1142,27 @@ impl QueryAnalyzer {
return; return;
} }
if available_aliases.len() == 1 { if true_sources.len() == 1 {
// Exactly one source available. // Exactly one "true" source available (from current FROM clause or parent scope).
let resolved_identifier = available_aliases.values().next().unwrap(); // Get the single value let resolved_entity_name = true_sources.values().next().unwrap(); // Get the actual table/CTE name
if let Some(table_info) = self.tables.get_mut(resolved_identifier) {
if let Some(table_info) = self.tables.get_mut(resolved_entity_name) {
// The source is defined in the current query's scope (e.g., in self.tables via current_scope_aliases).
table_info.columns.insert(column.to_string()); table_info.columns.insert(column.to_string());
} else { } else {
// The single alias/source resolved to something not in `self.tables`. // The single true source's resolved_entity_name is not in self.tables.
// This could happen if it's a parent alias. Mark column as vague for now. // Given true_sources = current_scope_aliases U parent_scope_aliases,
self.vague_columns.push(column.to_string()); // and values from current_scope_aliases should map to keys in self.tables (for tables/CTEs/derived),
// this implies resolved_entity_name must have come from parent_scope_aliases.
// Thus, the column is a correlated reference to an outer query. It's not vague in this context.
// No action needed here; the parent analyzer is responsible for it.
} }
} else if self.tables.is_empty() && available_aliases.is_empty() { } else if true_sources.is_empty() {
// No tables at all - definitely vague // No "true" sources (e.g., no FROM clause, not a correlated subquery with relevant parent sources).
// The column is unresolvable / vague in this context.
self.vague_columns.push(column.to_string()); self.vague_columns.push(column.to_string());
} else { } else { // true_sources.len() > 1
// Multiple available sources - ambiguous. Mark column as vague. // Multiple "true" sources available - ambiguous. Mark column as vague.
self.vague_columns.push(column.to_string()); self.vague_columns.push(column.to_string());
} }
} }
@ -1257,40 +1290,36 @@ impl QueryAnalyzer {
fn process_function_expr( fn process_function_expr(
&mut self, &mut self,
function: &sqlparser::ast::Function, function: &sqlparser::ast::Function,
available_aliases: &HashMap<String, String>, // This `param_available_aliases` includes select list aliases from the current scope.
// It's suitable for direct function arguments but NOT for window clause internals.
param_available_aliases: &HashMap<String, String>,
) { ) {
// Process function arguments // Process function arguments using param_available_aliases
if let sqlparser::ast::FunctionArguments::List(arg_list) = &function.args { if let sqlparser::ast::FunctionArguments::List(arg_list) = &function.args {
for arg in &arg_list.args { for arg in &arg_list.args {
match arg { match arg {
sqlparser::ast::FunctionArg::Unnamed(arg_expr) => { sqlparser::ast::FunctionArg::Unnamed(arg_expr) => {
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr {
self.visit_expr_with_parent_scope(expr, available_aliases); self.visit_expr_with_parent_scope(expr, param_available_aliases);
} else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr { } else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr {
// Handle cases like COUNT(table.*)
let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default(); let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default();
if !qualifier.is_empty() { if !qualifier.is_empty() {
if !available_aliases.contains_key(&qualifier) && // Check against combined available_aliases if !param_available_aliases.contains_key(&qualifier) &&
!self.tables.contains_key(&qualifier) && !self.tables.contains_key(&qualifier) &&
!self.is_known_cte_definition(&qualifier) { !self.is_known_cte_definition(&qualifier) {
self.vague_tables.push(qualifier); self.vague_tables.push(qualifier);
} }
} }
} else if let sqlparser::ast::FunctionArgExpr::Wildcard = arg_expr { } // Wildcard case needs no specific alias handling here
// Handle COUNT(*) - no specific column to track here
} }
} sqlparser::ast::FunctionArg::Named { arg: named_arg, .. } => {
sqlparser::ast::FunctionArg::Named { name, arg: named_arg, operator: _ } => {
// Argument name itself might be an identifier (though less common in SQL for this context)
// self.add_column_reference(None, &name.value, &available_aliases);
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg {
self.visit_expr_with_parent_scope(expr, available_aliases); self.visit_expr_with_parent_scope(expr, param_available_aliases);
} }
} }
sqlparser::ast::FunctionArg::ExprNamed { name, arg: expr_named_arg, operator: _ } => { sqlparser::ast::FunctionArg::ExprNamed { arg: expr_named_arg, .. } => {
// self.add_column_reference(None, &name.value, &available_aliases);
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg { if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg {
self.visit_expr_with_parent_scope(expr, available_aliases); self.visit_expr_with_parent_scope(expr, param_available_aliases);
} }
} }
} }
@ -1305,37 +1334,36 @@ impl QueryAnalyzer {
.. ..
})) = &function.over })) = &function.over
{ {
// For expressions within PARTITION BY, ORDER BY, and window frames,
// select list aliases from the current SELECT are NOT in scope.
// The correct scope is `self.parent_scope_aliases` (context of the function call)
// combined with `self.current_scope_aliases` (FROM clause of current query).
let mut aliases_for_window_internals = self.parent_scope_aliases.clone();
aliases_for_window_internals.extend(self.current_scope_aliases.clone());
for expr_item in partition_by { // expr_item is &Expr for expr_item in partition_by { // expr_item is &Expr
self.visit_expr_with_parent_scope(expr_item, available_aliases); self.visit_expr_with_parent_scope(expr_item, &aliases_for_window_internals);
} }
for order_expr_item in order_by { // order_expr_item is &OrderByExpr for order_expr_item in order_by { // order_expr_item is &OrderByExpr
self.visit_expr_with_parent_scope(&order_expr_item.expr, available_aliases); self.visit_expr_with_parent_scope(&order_expr_item.expr, &aliases_for_window_internals);
} }
if let Some(frame) = window_frame { if let Some(frame) = window_frame {
// frame.start_bound and frame.end_bound are WindowFrameBound
// which can contain Expr that needs visiting.
// The default Visitor implementation should handle these if they are Expr.
// However, sqlparser::ast::WindowFrameBound is not directly visitable.
// We need to manually extract expressions from it.
// Example for start_bound:
match &frame.start_bound { match &frame.start_bound {
sqlparser::ast::WindowFrameBound::CurrentRow => {} sqlparser::ast::WindowFrameBound::CurrentRow => {}
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
self.visit_expr_with_parent_scope(expr, available_aliases); self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals);
} }
sqlparser::ast::WindowFrameBound::Preceding(None) | sqlparser::ast::WindowFrameBound::Preceding(None) |
sqlparser::ast::WindowFrameBound::Following(None) => {} sqlparser::ast::WindowFrameBound::Following(None) => {}
} }
// Example for end_bound:
if let Some(end_bound) = &frame.end_bound { if let Some(end_bound) = &frame.end_bound {
match end_bound { match end_bound {
sqlparser::ast::WindowFrameBound::CurrentRow => {} sqlparser::ast::WindowFrameBound::CurrentRow => {}
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) | sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => { sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
self.visit_expr_with_parent_scope(expr, available_aliases); self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals);
} }
sqlparser::ast::WindowFrameBound::Preceding(None) | sqlparser::ast::WindowFrameBound::Preceding(None) |
sqlparser::ast::WindowFrameBound::Following(None) => {} sqlparser::ast::WindowFrameBound::Following(None) => {}