mirror of https://github.com/buster-so/buster.git
sql analysis improvements
This commit is contained in:
parent
0bac0282bb
commit
61467e886c
|
@ -1046,10 +1046,23 @@ impl QueryAnalyzer {
|
||||||
table_info.columns.insert(base_column.to_string());
|
table_info.columns.insert(base_column.to_string());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.vague_tables.push(qualifier.to_string());
|
// Qualifier resolved, but not to a table in the current scope's `self.tables`.
|
||||||
|
// This could be a select list alias or a parent scope alias's target.
|
||||||
|
// If it's not a known parent alias, then it's vague.
|
||||||
|
if !self.parent_scope_aliases.contains_key(qualifier) &&
|
||||||
|
!self.parent_scope_aliases.values().any(|v| v == resolved_identifier) {
|
||||||
|
// Also check if the qualifier itself is a known select list alias. If so, it's not a table.
|
||||||
|
if !self.current_select_list_aliases.contains(qualifier) {
|
||||||
|
self.vague_tables.push(qualifier.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If it IS a parent alias or a select list alias, we don't mark it vague here.
|
||||||
|
// For select list aliases, they can't be qualified further in standard SQL.
|
||||||
|
// For parent aliases, the column resolution is handled by the parent.
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if self.tables.contains_key(qualifier) {
|
// Qualifier itself is not in available_aliases (current_scope, parent_scope, or select_list_aliases)
|
||||||
|
if self.tables.contains_key(qualifier) { // Direct table name (not aliased in current scope)
|
||||||
if let Some(table_info) = self.tables.get_mut(qualifier) {
|
if let Some(table_info) = self.tables.get_mut(qualifier) {
|
||||||
table_info.columns.insert(column.to_string());
|
table_info.columns.insert(column.to_string());
|
||||||
if dialect_nested {
|
if dialect_nested {
|
||||||
|
@ -1057,10 +1070,8 @@ impl QueryAnalyzer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if self.parent_scope_aliases.contains_key(qualifier) {
|
} else if self.parent_scope_aliases.contains_key(qualifier) {
|
||||||
// Qualifier is not a known table/alias in current scope,
|
// Qualifier is a known parent scope alias.
|
||||||
// BUT it IS known in the parent scope (correlated subquery reference).
|
// This column belongs to the parent scope; do nothing here.
|
||||||
// We treat it as resolved for column analysis, but don't add the column
|
|
||||||
// to a table info in *this* analyzer. Do nothing here to prevent vagueness error.
|
|
||||||
} else {
|
} else {
|
||||||
// Qualifier not found in aliases, direct table names, or parent aliases. It's vague.
|
// Qualifier not found in aliases, direct table names, or parent aliases. It's vague.
|
||||||
self.vague_tables.push(qualifier.to_string());
|
self.vague_tables.push(qualifier.to_string());
|
||||||
|
@ -1068,6 +1079,7 @@ impl QueryAnalyzer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
// Unqualified column
|
||||||
// Check if it's a known select list alias first
|
// Check if it's a known select list alias first
|
||||||
if self.current_select_list_aliases.contains(column) {
|
if self.current_select_list_aliases.contains(column) {
|
||||||
// It's a select list alias, consider it resolved for this scope.
|
// It's a select list alias, consider it resolved for this scope.
|
||||||
|
@ -1075,29 +1087,44 @@ impl QueryAnalyzer {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special handling for nested fields without qualifier
|
// Construct true_sources: only from current_scope_aliases (FROM clause) and parent_scope_aliases (outer queries)
|
||||||
// For example: "SELECT user.device.type" in BigQuery becomes "SELECT user__device__type"
|
// Excludes select list aliases for determining ambiguity of other unqualified columns.
|
||||||
|
let mut true_sources = self.current_scope_aliases.clone();
|
||||||
|
true_sources.extend(self.parent_scope_aliases.clone());
|
||||||
|
|
||||||
|
|
||||||
if dialect_nested {
|
if dialect_nested {
|
||||||
// Try to find a table that might contain the base column
|
// Handle unqualified dialect_nested columns (e.g., SELECT user__device__type)
|
||||||
let mut assigned = false;
|
// The base_column (e.g., "user") must unambiguously refer to a single true source.
|
||||||
|
if true_sources.len() == 1 {
|
||||||
|
let source_alias = true_sources.keys().next().unwrap(); // Alias used in query (e.g., "u" in "FROM users u")
|
||||||
|
let resolved_entity_name = true_sources.values().next().unwrap(); // Actual table/CTE name (e.g., "users")
|
||||||
|
|
||||||
for table_info in self.tables.values_mut() {
|
// Check if base_column matches the alias or the resolved name of the single source
|
||||||
// For now, simply add the column to all tables
|
if base_column == source_alias || base_column == resolved_entity_name {
|
||||||
// This is less strict but ensures we don't miss real references
|
if let Some(table_info) = self.tables.get_mut(resolved_entity_name) {
|
||||||
table_info.columns.insert(base_column.to_string());
|
table_info.columns.insert(base_column.to_string()); // Add base part (e.g. "user")
|
||||||
table_info.columns.insert(column.to_string());
|
table_info.columns.insert(column.to_string()); // Add full dialect nested column (e.g. "user__device__type")
|
||||||
assigned = true;
|
} else {
|
||||||
}
|
// Single true source, but its resolved_entity_name is not in self.tables.
|
||||||
|
// This implies it's a parent scope entity.
|
||||||
// If we couldn't assign it to any table and we have tables in scope,
|
// The dialect-nested column is considered resolved to the parent.
|
||||||
// it's likely a literal or expression, so don't report as vague
|
}
|
||||||
if !assigned && !self.tables.is_empty() {
|
} else {
|
||||||
// Just add the base column as vague for reporting
|
// Single true source, but base_column does not match it.
|
||||||
|
// e.g., FROM tableA SELECT fieldX__fieldY (where fieldX is not tableA)
|
||||||
|
self.vague_columns.push(base_column.to_string());
|
||||||
|
}
|
||||||
|
} else if true_sources.is_empty() {
|
||||||
|
// No true sources, but a dialect_nested column is used. Vague.
|
||||||
|
self.vague_columns.push(base_column.to_string());
|
||||||
|
} else { // true_sources.len() > 1
|
||||||
|
// Multiple true sources, ambiguous which one `base_column` refers to. Vague.
|
||||||
self.vague_columns.push(base_column.to_string());
|
self.vague_columns.push(base_column.to_string());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Standard unqualified column handling
|
// Standard unqualified column handling
|
||||||
self.resolve_unqualified_column(column, available_aliases);
|
self.resolve_unqualified_column(column, &true_sources);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1107,7 +1134,7 @@ impl QueryAnalyzer {
|
||||||
fn resolve_unqualified_column(
|
fn resolve_unqualified_column(
|
||||||
&mut self,
|
&mut self,
|
||||||
column: &str,
|
column: &str,
|
||||||
available_aliases: &HashMap<String, String>,
|
true_sources: &HashMap<String, String>, // Changed from available_aliases
|
||||||
) {
|
) {
|
||||||
// Special case for the test_vague_references test - always report unqualified 'id' as vague
|
// Special case for the test_vague_references test - always report unqualified 'id' as vague
|
||||||
if column == "id" {
|
if column == "id" {
|
||||||
|
@ -1115,21 +1142,27 @@ impl QueryAnalyzer {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if available_aliases.len() == 1 {
|
if true_sources.len() == 1 {
|
||||||
// Exactly one source available.
|
// Exactly one "true" source available (from current FROM clause or parent scope).
|
||||||
let resolved_identifier = available_aliases.values().next().unwrap(); // Get the single value
|
let resolved_entity_name = true_sources.values().next().unwrap(); // Get the actual table/CTE name
|
||||||
if let Some(table_info) = self.tables.get_mut(resolved_identifier) {
|
|
||||||
|
if let Some(table_info) = self.tables.get_mut(resolved_entity_name) {
|
||||||
|
// The source is defined in the current query's scope (e.g., in self.tables via current_scope_aliases).
|
||||||
table_info.columns.insert(column.to_string());
|
table_info.columns.insert(column.to_string());
|
||||||
} else {
|
} else {
|
||||||
// The single alias/source resolved to something not in `self.tables`.
|
// The single true source's resolved_entity_name is not in self.tables.
|
||||||
// This could happen if it's a parent alias. Mark column as vague for now.
|
// Given true_sources = current_scope_aliases U parent_scope_aliases,
|
||||||
self.vague_columns.push(column.to_string());
|
// and values from current_scope_aliases should map to keys in self.tables (for tables/CTEs/derived),
|
||||||
|
// this implies resolved_entity_name must have come from parent_scope_aliases.
|
||||||
|
// Thus, the column is a correlated reference to an outer query. It's not vague in this context.
|
||||||
|
// No action needed here; the parent analyzer is responsible for it.
|
||||||
}
|
}
|
||||||
} else if self.tables.is_empty() && available_aliases.is_empty() {
|
} else if true_sources.is_empty() {
|
||||||
// No tables at all - definitely vague
|
// No "true" sources (e.g., no FROM clause, not a correlated subquery with relevant parent sources).
|
||||||
|
// The column is unresolvable / vague in this context.
|
||||||
self.vague_columns.push(column.to_string());
|
self.vague_columns.push(column.to_string());
|
||||||
} else {
|
} else { // true_sources.len() > 1
|
||||||
// Multiple available sources - ambiguous. Mark column as vague.
|
// Multiple "true" sources available - ambiguous. Mark column as vague.
|
||||||
self.vague_columns.push(column.to_string());
|
self.vague_columns.push(column.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1257,40 +1290,36 @@ impl QueryAnalyzer {
|
||||||
fn process_function_expr(
|
fn process_function_expr(
|
||||||
&mut self,
|
&mut self,
|
||||||
function: &sqlparser::ast::Function,
|
function: &sqlparser::ast::Function,
|
||||||
available_aliases: &HashMap<String, String>,
|
// This `param_available_aliases` includes select list aliases from the current scope.
|
||||||
|
// It's suitable for direct function arguments but NOT for window clause internals.
|
||||||
|
param_available_aliases: &HashMap<String, String>,
|
||||||
) {
|
) {
|
||||||
// Process function arguments
|
// Process function arguments using param_available_aliases
|
||||||
if let sqlparser::ast::FunctionArguments::List(arg_list) = &function.args {
|
if let sqlparser::ast::FunctionArguments::List(arg_list) = &function.args {
|
||||||
for arg in &arg_list.args {
|
for arg in &arg_list.args {
|
||||||
match arg {
|
match arg {
|
||||||
sqlparser::ast::FunctionArg::Unnamed(arg_expr) => {
|
sqlparser::ast::FunctionArg::Unnamed(arg_expr) => {
|
||||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr {
|
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr {
|
||||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
self.visit_expr_with_parent_scope(expr, param_available_aliases);
|
||||||
} else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr {
|
} else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr {
|
||||||
// Handle cases like COUNT(table.*)
|
|
||||||
let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default();
|
let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default();
|
||||||
if !qualifier.is_empty() {
|
if !qualifier.is_empty() {
|
||||||
if !available_aliases.contains_key(&qualifier) && // Check against combined available_aliases
|
if !param_available_aliases.contains_key(&qualifier) &&
|
||||||
!self.tables.contains_key(&qualifier) &&
|
!self.tables.contains_key(&qualifier) &&
|
||||||
!self.is_known_cte_definition(&qualifier) {
|
!self.is_known_cte_definition(&qualifier) {
|
||||||
self.vague_tables.push(qualifier);
|
self.vague_tables.push(qualifier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if let sqlparser::ast::FunctionArgExpr::Wildcard = arg_expr {
|
} // Wildcard case needs no specific alias handling here
|
||||||
// Handle COUNT(*) - no specific column to track here
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
sqlparser::ast::FunctionArg::Named { name, arg: named_arg, operator: _ } => {
|
sqlparser::ast::FunctionArg::Named { arg: named_arg, .. } => {
|
||||||
// Argument name itself might be an identifier (though less common in SQL for this context)
|
|
||||||
// self.add_column_reference(None, &name.value, &available_aliases);
|
|
||||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg {
|
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg {
|
||||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
self.visit_expr_with_parent_scope(expr, param_available_aliases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sqlparser::ast::FunctionArg::ExprNamed { name, arg: expr_named_arg, operator: _ } => {
|
sqlparser::ast::FunctionArg::ExprNamed { arg: expr_named_arg, .. } => {
|
||||||
// self.add_column_reference(None, &name.value, &available_aliases);
|
|
||||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg {
|
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg {
|
||||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
self.visit_expr_with_parent_scope(expr, param_available_aliases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1305,37 +1334,36 @@ impl QueryAnalyzer {
|
||||||
..
|
..
|
||||||
})) = &function.over
|
})) = &function.over
|
||||||
{
|
{
|
||||||
|
// For expressions within PARTITION BY, ORDER BY, and window frames,
|
||||||
|
// select list aliases from the current SELECT are NOT in scope.
|
||||||
|
// The correct scope is `self.parent_scope_aliases` (context of the function call)
|
||||||
|
// combined with `self.current_scope_aliases` (FROM clause of current query).
|
||||||
|
let mut aliases_for_window_internals = self.parent_scope_aliases.clone();
|
||||||
|
aliases_for_window_internals.extend(self.current_scope_aliases.clone());
|
||||||
|
|
||||||
for expr_item in partition_by { // expr_item is &Expr
|
for expr_item in partition_by { // expr_item is &Expr
|
||||||
self.visit_expr_with_parent_scope(expr_item, available_aliases);
|
self.visit_expr_with_parent_scope(expr_item, &aliases_for_window_internals);
|
||||||
}
|
}
|
||||||
for order_expr_item in order_by { // order_expr_item is &OrderByExpr
|
for order_expr_item in order_by { // order_expr_item is &OrderByExpr
|
||||||
self.visit_expr_with_parent_scope(&order_expr_item.expr, available_aliases);
|
self.visit_expr_with_parent_scope(&order_expr_item.expr, &aliases_for_window_internals);
|
||||||
}
|
}
|
||||||
if let Some(frame) = window_frame {
|
if let Some(frame) = window_frame {
|
||||||
// frame.start_bound and frame.end_bound are WindowFrameBound
|
|
||||||
// which can contain Expr that needs visiting.
|
|
||||||
// The default Visitor implementation should handle these if they are Expr.
|
|
||||||
// However, sqlparser::ast::WindowFrameBound is not directly visitable.
|
|
||||||
// We need to manually extract expressions from it.
|
|
||||||
|
|
||||||
// Example for start_bound:
|
|
||||||
match &frame.start_bound {
|
match &frame.start_bound {
|
||||||
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
||||||
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
||||||
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
||||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals);
|
||||||
}
|
}
|
||||||
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
||||||
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Example for end_bound:
|
|
||||||
if let Some(end_bound) = &frame.end_bound {
|
if let Some(end_bound) = &frame.end_bound {
|
||||||
match end_bound {
|
match end_bound {
|
||||||
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
||||||
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
||||||
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
||||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
self.visit_expr_with_parent_scope(expr, &aliases_for_window_internals);
|
||||||
}
|
}
|
||||||
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
||||||
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
||||||
|
|
Loading…
Reference in New Issue