diff --git a/apps/api/libs/sql_analyzer/src/analysis.rs b/apps/api/libs/sql_analyzer/src/analysis.rs index 6ed4f4dd2..8fbd439b2 100644 --- a/apps/api/libs/sql_analyzer/src/analysis.rs +++ b/apps/api/libs/sql_analyzer/src/analysis.rs @@ -178,11 +178,11 @@ impl QueryAnalyzer { self.parent_scope_aliases = parent_aliases.clone(); // Process WITH clause (CTEs) if present - let is_with_query = self.process_with_clause(query)?; + let is_with_query = self.process_with_clause(query); // Process the main query body match query.body.as_ref() { - SetExpr::Select(select) => self.process_select_query(select)?, + SetExpr::Select(select) => self.process_select_query(select), SetExpr::Query(inner_query) => { self.process_nested_query(inner_query)?; } @@ -201,7 +201,7 @@ impl QueryAnalyzer { } // Process WITH clause and return whether it was processed - fn process_with_clause(&mut self, query: &Query) -> Result { + fn process_with_clause(&mut self, query: &Query) -> bool { if let Some(with) = &query.with { if !with.cte_tables.is_empty() { // Create a new scope for CTE definitions @@ -223,16 +223,18 @@ impl QueryAnalyzer { .chain(self.parent_scope_aliases.iter()) .map(|(k, v)| (k.clone(), v.clone())) .collect(); - self.process_cte(cte, &combined_aliases_for_cte)?; + if let Err(e) = self.process_cte(cte, &combined_aliases_for_cte) { + eprintln!("Error processing CTE: {}", e); + } } - return Ok(true); + return true; } } - Ok(false) + false } // Process a SELECT query - fn process_select_query(&mut self, select: &sqlparser::ast::Select) -> Result<(), SqlAnalyzerError> { + fn process_select_query(&mut self, select: &sqlparser::ast::Select) { self.current_scope_aliases.clear(); self.current_select_list_aliases.clear(); self.current_from_relation_identifier = None; @@ -291,9 +293,8 @@ impl QueryAnalyzer { // Process SELECT list for item in &select.projection { - self.process_select_item(item, &combined_aliases_for_visit)?; + self.process_select_item(item, &combined_aliases_for_visit); } - Ok(()) } // Process join data and collect conditions for later processing @@ -517,7 +518,6 @@ impl QueryAnalyzer { Err(e @ SqlAnalyzerError::VagueReferences(_)) => Err( SqlAnalyzerError::VagueReferences(format!("In CTE '{}': {}", cte_name, e)), ), - Err(e @ SqlAnalyzerError::BlockedWildcardUsage(_)) => Err(e), Err(e) => Err(SqlAnalyzerError::Internal(anyhow::anyhow!( "Internal error summarizing CTE '{}': {}", cte_name, @@ -528,7 +528,6 @@ impl QueryAnalyzer { Err(e @ SqlAnalyzerError::VagueReferences(_)) => Err( SqlAnalyzerError::VagueReferences(format!("In CTE '{}': {}", cte_name, e)), ), - Err(e @ SqlAnalyzerError::BlockedWildcardUsage(_)) => Err(e), Err(e) => Err(SqlAnalyzerError::Internal(anyhow::anyhow!( "Error processing CTE '{}': {}", cte_name, @@ -915,7 +914,7 @@ impl QueryAnalyzer { &mut self, select_item: &SelectItem, parent_aliases: &HashMap, - ) -> Result<(), SqlAnalyzerError> { + ) { match select_item { SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } => { self.visit_expr_with_parent_scope(expr, parent_aliases); @@ -927,8 +926,6 @@ impl QueryAnalyzer { .map(|i| i.value.clone()) .unwrap_or_default(); if !qualifier.is_empty() { - self.validate_qualified_wildcard(&qualifier)?; - if !self.current_scope_aliases.contains_key(&qualifier) && !parent_aliases.contains_key(&qualifier) && !self.tables.contains_key(&qualifier) @@ -939,10 +936,9 @@ impl QueryAnalyzer { } } SelectItem::Wildcard(_) => { - self.validate_wildcard_on_tables()?; + // Unqualified wildcard - we don't explicitly add columns for unqualified wildcard } } - Ok(()) } fn into_summary(mut self) -> Result { @@ -1149,53 +1145,6 @@ impl QueryAnalyzer { in_definitions || in_ctes } - fn validate_wildcard_on_tables(&self) -> Result<(), SqlAnalyzerError> { - // Only validate tables that are actually in the FROM clause - if let Some(from_table) = &self.current_from_relation_identifier { - if let Some(table_info) = self.tables.get(from_table) { - if table_info.kind == TableKind::Base { - return Err(SqlAnalyzerError::BlockedWildcardUsage(format!( - "table '{}'", table_info.table_identifier - ))); - } - } - } - - // Also check any tables that might be in current scope aliases that are physical tables - for alias in self.current_scope_aliases.keys() { - if let Some(from_table) = &self.current_from_relation_identifier { - if alias == from_table { - continue; - } - } - - if let Some(table_info) = self.tables.get(alias) { - if table_info.kind == TableKind::Base { - return Err(SqlAnalyzerError::BlockedWildcardUsage(format!( - "table '{}'", table_info.table_identifier - ))); - } - } - } - Ok(()) - } - - fn validate_qualified_wildcard(&self, qualifier: &str) -> Result<(), SqlAnalyzerError> { - let resolved_table = self.current_scope_aliases.get(qualifier) - .or_else(|| self.parent_scope_aliases.get(qualifier)) - .map(|s| s.as_str()) - .unwrap_or(qualifier); - - if let Some(table_info) = self.tables.get(resolved_table) { - if table_info.kind == TableKind::Base { - return Err(SqlAnalyzerError::BlockedWildcardUsage(format!( - "table '{}'", table_info.table_identifier - ))); - } - } - Ok(()) - } - fn add_column_reference( &mut self, qualifier_opt: Option<&str>, diff --git a/apps/api/libs/sql_analyzer/src/errors.rs b/apps/api/libs/sql_analyzer/src/errors.rs index ffda1d0b4..0db563506 100644 --- a/apps/api/libs/sql_analyzer/src/errors.rs +++ b/apps/api/libs/sql_analyzer/src/errors.rs @@ -35,9 +35,6 @@ pub enum SqlAnalyzerError { #[error("Unsupported statement type found: {0}")] UnsupportedStatement(String), - #[error("Wildcard usage on physical tables is not allowed: {0}")] - BlockedWildcardUsage(String), - #[error("Internal error: {0}")] Internal(#[from] anyhow::Error), } @@ -46,4 +43,4 @@ impl From for SqlAnalyzerError { fn from(err: sqlparser::parser::ParserError) -> Self { SqlAnalyzerError::ParseError(err.to_string()) } -} +} \ No newline at end of file diff --git a/apps/api/libs/sql_analyzer/tests/analysis_tests.rs b/apps/api/libs/sql_analyzer/tests/analysis_tests.rs index 2c0bcde10..8f3e1a4bc 100644 --- a/apps/api/libs/sql_analyzer/tests/analysis_tests.rs +++ b/apps/api/libs/sql_analyzer/tests/analysis_tests.rs @@ -413,90 +413,6 @@ async fn test_multiple_chained_ctes() { assert_eq!(result.joins.len(), 0, "Main query should have no direct joins"); } -#[tokio::test] -async fn test_wildcard_blocked_on_physical_table() { - let sql = "SELECT * FROM schema.users"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_err()); - if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result { - assert!(msg.contains("users")); - } else { - panic!("Expected BlockedWildcardUsage error, got: {:?}", result); - } -} - -#[tokio::test] -async fn test_qualified_wildcard_blocked_on_physical_table() { - let sql = "SELECT u.* FROM schema.users u"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_err()); - if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result { - assert!(msg.contains("users")); - } else { - panic!("Expected BlockedWildcardUsage error, got: {:?}", result); - } -} - -#[tokio::test] -async fn test_wildcard_allowed_on_cte() { - let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT * FROM user_cte"; - let result = analyze_query(sql.to_string(), "postgres").await; - - match result { - Ok(_) => { - } - Err(e) => { - eprintln!("DEBUG: Unexpected error in test_wildcard_allowed_on_cte: {:?}", e); - panic!("Wildcard on CTE should be allowed, but got error: {:?}", e); - } - } -} - -#[tokio::test] -async fn test_qualified_wildcard_allowed_on_cte() { - let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT uc.* FROM user_cte uc"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_ok(), "Qualified wildcard on CTE should be allowed"); -} - -#[tokio::test] -async fn test_wildcard_blocked_when_cte_uses_wildcard_on_physical_table() { - let sql = "WITH user_cte AS (SELECT * FROM schema.users) SELECT * FROM user_cte"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_err()); - if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result { - assert!(msg.contains("users")); - } else { - panic!("Expected BlockedWildcardUsage error for CTE using wildcard on physical table, got: {:?}", result); - } -} - -#[tokio::test] -async fn test_wildcard_allowed_when_cte_uses_explicit_columns() { - let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT * FROM user_cte"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_ok(), "Wildcard should be allowed when CTE uses explicit columns"); -} - -#[tokio::test] -async fn test_mixed_wildcard_scenarios() { - let sql = "WITH orders_cte AS (SELECT o.order_id FROM schema.orders o) - SELECT oc.*, u.* FROM orders_cte oc JOIN schema.users u ON oc.order_id = u.id"; - let result = analyze_query(sql.to_string(), "postgres").await; - - assert!(result.is_err()); - if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result { - assert!(msg.contains("users")); - } else { - panic!("Expected BlockedWildcardUsage error for wildcard on physical table, got: {:?}", result); - } -} - #[tokio::test] async fn test_complex_where_clause() { let sql = r#" @@ -2182,7 +2098,7 @@ async fn test_databricks_pivot() { #[tokio::test] async fn test_databricks_qualified_wildcard() { - // Test Databricks qualified wildcards - should now be blocked due to security enhancement + // Test Databricks qualified wildcards let sql = r#" SELECT u.user_id, @@ -2195,14 +2111,25 @@ async fn test_databricks_qualified_wildcard() { WHERE u.status = 'active' AND p.amount > 100 "#; - let result = analyze_query(sql.to_string(), "databricks").await; + let result = analyze_query(sql.to_string(), "databricks").await.unwrap(); - assert!(result.is_err()); - if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result { - assert!(msg.contains("users") || msg.contains("purchases")); - } else { - panic!("Expected BlockedWildcardUsage error for wildcards on physical tables, got: {:?}", result); - } + // Check base tables + let base_tables: Vec<_> = result.tables.iter() + .filter(|t| t.kind == TableKind::Base) + .map(|t| t.table_identifier.clone()) + .collect(); + + assert!(base_tables.contains(&"users".to_string()), "Should detect users table"); + assert!(base_tables.contains(&"purchases".to_string()), "Should detect purchases table"); + + // Check columns + let users_table = result.tables.iter().find(|t| t.table_identifier == "users").unwrap(); + assert!(users_table.columns.contains("user_id"), "Should detect user_id column"); + assert!(users_table.columns.contains("name"), "Should detect name column"); + assert!(users_table.columns.contains("status"), "Should detect status column"); + + // Check joins + assert!(!result.joins.is_empty(), "Should detect JOIN"); } #[tokio::test]