revert: remove all Rust wildcard validation changes

- Revert apps/api/libs/sql_analyzer/src/analysis.rs to staging
- Revert apps/api/libs/sql_analyzer/src/errors.rs to staging
- Revert apps/api/libs/sql_analyzer/tests/analysis_tests.rs to staging
- Keep only TypeScript wildcard validation implementation
- PR now contains only relevant TypeScript changes in packages/ai/

Co-Authored-By: Dallin Bentley <dallinbentley98@gmail.com>
This commit is contained in:
Devin AI 2025-07-23 16:06:51 +00:00
parent a44153e2ee
commit 458ac3e6ec
3 changed files with 32 additions and 159 deletions

View File

@ -178,11 +178,11 @@ impl QueryAnalyzer {
self.parent_scope_aliases = parent_aliases.clone();
// Process WITH clause (CTEs) if present
let is_with_query = self.process_with_clause(query)?;
let is_with_query = self.process_with_clause(query);
// Process the main query body
match query.body.as_ref() {
SetExpr::Select(select) => self.process_select_query(select)?,
SetExpr::Select(select) => self.process_select_query(select),
SetExpr::Query(inner_query) => {
self.process_nested_query(inner_query)?;
}
@ -201,7 +201,7 @@ impl QueryAnalyzer {
}
// Process WITH clause and return whether it was processed
fn process_with_clause(&mut self, query: &Query) -> Result<bool, SqlAnalyzerError> {
fn process_with_clause(&mut self, query: &Query) -> bool {
if let Some(with) = &query.with {
if !with.cte_tables.is_empty() {
// Create a new scope for CTE definitions
@ -223,16 +223,18 @@ impl QueryAnalyzer {
.chain(self.parent_scope_aliases.iter())
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
self.process_cte(cte, &combined_aliases_for_cte)?;
if let Err(e) = self.process_cte(cte, &combined_aliases_for_cte) {
eprintln!("Error processing CTE: {}", e);
}
}
return Ok(true);
return true;
}
}
Ok(false)
false
}
// Process a SELECT query
fn process_select_query(&mut self, select: &sqlparser::ast::Select) -> Result<(), SqlAnalyzerError> {
fn process_select_query(&mut self, select: &sqlparser::ast::Select) {
self.current_scope_aliases.clear();
self.current_select_list_aliases.clear();
self.current_from_relation_identifier = None;
@ -291,9 +293,8 @@ impl QueryAnalyzer {
// Process SELECT list
for item in &select.projection {
self.process_select_item(item, &combined_aliases_for_visit)?;
self.process_select_item(item, &combined_aliases_for_visit);
}
Ok(())
}
// Process join data and collect conditions for later processing
@ -517,7 +518,6 @@ impl QueryAnalyzer {
Err(e @ SqlAnalyzerError::VagueReferences(_)) => Err(
SqlAnalyzerError::VagueReferences(format!("In CTE '{}': {}", cte_name, e)),
),
Err(e @ SqlAnalyzerError::BlockedWildcardUsage(_)) => Err(e),
Err(e) => Err(SqlAnalyzerError::Internal(anyhow::anyhow!(
"Internal error summarizing CTE '{}': {}",
cte_name,
@ -528,7 +528,6 @@ impl QueryAnalyzer {
Err(e @ SqlAnalyzerError::VagueReferences(_)) => Err(
SqlAnalyzerError::VagueReferences(format!("In CTE '{}': {}", cte_name, e)),
),
Err(e @ SqlAnalyzerError::BlockedWildcardUsage(_)) => Err(e),
Err(e) => Err(SqlAnalyzerError::Internal(anyhow::anyhow!(
"Error processing CTE '{}': {}",
cte_name,
@ -915,7 +914,7 @@ impl QueryAnalyzer {
&mut self,
select_item: &SelectItem,
parent_aliases: &HashMap<String, String>,
) -> Result<(), SqlAnalyzerError> {
) {
match select_item {
SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } => {
self.visit_expr_with_parent_scope(expr, parent_aliases);
@ -927,8 +926,6 @@ impl QueryAnalyzer {
.map(|i| i.value.clone())
.unwrap_or_default();
if !qualifier.is_empty() {
self.validate_qualified_wildcard(&qualifier)?;
if !self.current_scope_aliases.contains_key(&qualifier)
&& !parent_aliases.contains_key(&qualifier)
&& !self.tables.contains_key(&qualifier)
@ -939,10 +936,9 @@ impl QueryAnalyzer {
}
}
SelectItem::Wildcard(_) => {
self.validate_wildcard_on_tables()?;
// Unqualified wildcard - we don't explicitly add columns for unqualified wildcard
}
}
Ok(())
}
fn into_summary(mut self) -> Result<QuerySummary, SqlAnalyzerError> {
@ -1149,53 +1145,6 @@ impl QueryAnalyzer {
in_definitions || in_ctes
}
fn validate_wildcard_on_tables(&self) -> Result<(), SqlAnalyzerError> {
// Only validate tables that are actually in the FROM clause
if let Some(from_table) = &self.current_from_relation_identifier {
if let Some(table_info) = self.tables.get(from_table) {
if table_info.kind == TableKind::Base {
return Err(SqlAnalyzerError::BlockedWildcardUsage(format!(
"table '{}'", table_info.table_identifier
)));
}
}
}
// Also check any tables that might be in current scope aliases that are physical tables
for alias in self.current_scope_aliases.keys() {
if let Some(from_table) = &self.current_from_relation_identifier {
if alias == from_table {
continue;
}
}
if let Some(table_info) = self.tables.get(alias) {
if table_info.kind == TableKind::Base {
return Err(SqlAnalyzerError::BlockedWildcardUsage(format!(
"table '{}'", table_info.table_identifier
)));
}
}
}
Ok(())
}
fn validate_qualified_wildcard(&self, qualifier: &str) -> Result<(), SqlAnalyzerError> {
let resolved_table = self.current_scope_aliases.get(qualifier)
.or_else(|| self.parent_scope_aliases.get(qualifier))
.map(|s| s.as_str())
.unwrap_or(qualifier);
if let Some(table_info) = self.tables.get(resolved_table) {
if table_info.kind == TableKind::Base {
return Err(SqlAnalyzerError::BlockedWildcardUsage(format!(
"table '{}'", table_info.table_identifier
)));
}
}
Ok(())
}
fn add_column_reference(
&mut self,
qualifier_opt: Option<&str>,

View File

@ -35,9 +35,6 @@ pub enum SqlAnalyzerError {
#[error("Unsupported statement type found: {0}")]
UnsupportedStatement(String),
#[error("Wildcard usage on physical tables is not allowed: {0}")]
BlockedWildcardUsage(String),
#[error("Internal error: {0}")]
Internal(#[from] anyhow::Error),
}
@ -46,4 +43,4 @@ impl From<sqlparser::parser::ParserError> for SqlAnalyzerError {
fn from(err: sqlparser::parser::ParserError) -> Self {
SqlAnalyzerError::ParseError(err.to_string())
}
}
}

View File

@ -413,90 +413,6 @@ async fn test_multiple_chained_ctes() {
assert_eq!(result.joins.len(), 0, "Main query should have no direct joins");
}
#[tokio::test]
async fn test_wildcard_blocked_on_physical_table() {
let sql = "SELECT * FROM schema.users";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result {
assert!(msg.contains("users"));
} else {
panic!("Expected BlockedWildcardUsage error, got: {:?}", result);
}
}
#[tokio::test]
async fn test_qualified_wildcard_blocked_on_physical_table() {
let sql = "SELECT u.* FROM schema.users u";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result {
assert!(msg.contains("users"));
} else {
panic!("Expected BlockedWildcardUsage error, got: {:?}", result);
}
}
#[tokio::test]
async fn test_wildcard_allowed_on_cte() {
let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT * FROM user_cte";
let result = analyze_query(sql.to_string(), "postgres").await;
match result {
Ok(_) => {
}
Err(e) => {
eprintln!("DEBUG: Unexpected error in test_wildcard_allowed_on_cte: {:?}", e);
panic!("Wildcard on CTE should be allowed, but got error: {:?}", e);
}
}
}
#[tokio::test]
async fn test_qualified_wildcard_allowed_on_cte() {
let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT uc.* FROM user_cte uc";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_ok(), "Qualified wildcard on CTE should be allowed");
}
#[tokio::test]
async fn test_wildcard_blocked_when_cte_uses_wildcard_on_physical_table() {
let sql = "WITH user_cte AS (SELECT * FROM schema.users) SELECT * FROM user_cte";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result {
assert!(msg.contains("users"));
} else {
panic!("Expected BlockedWildcardUsage error for CTE using wildcard on physical table, got: {:?}", result);
}
}
#[tokio::test]
async fn test_wildcard_allowed_when_cte_uses_explicit_columns() {
let sql = "WITH user_cte AS (SELECT u.id, u.name FROM schema.users u) SELECT * FROM user_cte";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_ok(), "Wildcard should be allowed when CTE uses explicit columns");
}
#[tokio::test]
async fn test_mixed_wildcard_scenarios() {
let sql = "WITH orders_cte AS (SELECT o.order_id FROM schema.orders o)
SELECT oc.*, u.* FROM orders_cte oc JOIN schema.users u ON oc.order_id = u.id";
let result = analyze_query(sql.to_string(), "postgres").await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result {
assert!(msg.contains("users"));
} else {
panic!("Expected BlockedWildcardUsage error for wildcard on physical table, got: {:?}", result);
}
}
#[tokio::test]
async fn test_complex_where_clause() {
let sql = r#"
@ -2182,7 +2098,7 @@ async fn test_databricks_pivot() {
#[tokio::test]
async fn test_databricks_qualified_wildcard() {
// Test Databricks qualified wildcards - should now be blocked due to security enhancement
// Test Databricks qualified wildcards
let sql = r#"
SELECT
u.user_id,
@ -2195,14 +2111,25 @@ async fn test_databricks_qualified_wildcard() {
WHERE u.status = 'active' AND p.amount > 100
"#;
let result = analyze_query(sql.to_string(), "databricks").await;
let result = analyze_query(sql.to_string(), "databricks").await.unwrap();
assert!(result.is_err());
if let Err(SqlAnalyzerError::BlockedWildcardUsage(msg)) = result {
assert!(msg.contains("users") || msg.contains("purchases"));
} else {
panic!("Expected BlockedWildcardUsage error for wildcards on physical tables, got: {:?}", result);
}
// Check base tables
let base_tables: Vec<_> = result.tables.iter()
.filter(|t| t.kind == TableKind::Base)
.map(|t| t.table_identifier.clone())
.collect();
assert!(base_tables.contains(&"users".to_string()), "Should detect users table");
assert!(base_tables.contains(&"purchases".to_string()), "Should detect purchases table");
// Check columns
let users_table = result.tables.iter().find(|t| t.table_identifier == "users").unwrap();
assert!(users_table.columns.contains("user_id"), "Should detect user_id column");
assert!(users_table.columns.contains("name"), "Should detect name column");
assert!(users_table.columns.contains("status"), "Should detect status column");
// Check joins
assert!(!result.joins.is_empty(), "Should detect JOIN");
}
#[tokio::test]