From fdca3801349ae04512397cec5956a55593896a1d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 13:34:56 +0000 Subject: [PATCH] feat: implement wildcard validation in TypeScript sql-parser-helpers - Add validateWildcardUsage() function to block SELECT * on physical tables - Allow wildcards on CTEs but block on physical database tables - Add comprehensive tests for wildcard validation scenarios - Integrate wildcard validation into permission validator - Supports all SQL dialects via node-sql-parser - Prevents permission bypass through wildcard queries Co-Authored-By: Dallin Bentley --- .../sql-permissions/permission-validator.ts | 10 ++ .../sql-parser-helpers.test.ts | 105 +++++++++++++ .../sql-permissions/sql-parser-helpers.ts | 148 ++++++++++++++++++ 3 files changed, 263 insertions(+) diff --git a/packages/ai/src/utils/sql-permissions/permission-validator.ts b/packages/ai/src/utils/sql-permissions/permission-validator.ts index cf559916b..b2e5429ea 100644 --- a/packages/ai/src/utils/sql-permissions/permission-validator.ts +++ b/packages/ai/src/utils/sql-permissions/permission-validator.ts @@ -5,6 +5,7 @@ import { extractPhysicalTables, extractTablesFromYml, tablesMatch, + validateWildcardUsage, } from './sql-parser-helpers'; export interface PermissionValidationResult { @@ -33,6 +34,15 @@ export async function validateSqlPermissions( }; } + const wildcardCheck = validateWildcardUsage(sql, dataSourceSyntax); + if (!wildcardCheck.isValid) { + return { + isAuthorized: false, + unauthorizedTables: wildcardCheck.blockedTables || [], + error: wildcardCheck.error || 'Wildcard usage on physical tables is not allowed', + }; + } + // Extract physical tables from SQL const tablesInQuery = extractPhysicalTables(sql, dataSourceSyntax); diff --git a/packages/ai/src/utils/sql-permissions/sql-parser-helpers.test.ts b/packages/ai/src/utils/sql-permissions/sql-parser-helpers.test.ts index 05f743022..22371bdc8 100644 --- a/packages/ai/src/utils/sql-permissions/sql-parser-helpers.test.ts +++ b/packages/ai/src/utils/sql-permissions/sql-parser-helpers.test.ts @@ -7,6 +7,7 @@ import { normalizeTableIdentifier, parseTableReference, tablesMatch, + validateWildcardUsage, } from './sql-parser-helpers'; describe('SQL Parser Helpers', () => { @@ -420,6 +421,110 @@ models: }); }); + describe('validateWildcardUsage', () => { + it('should block unqualified wildcard on physical table', () => { + const sql = 'SELECT * FROM users'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.error).toContain('Wildcard usage on physical tables is not allowed'); + expect(result.blockedTables).toContain('users'); + }); + + it('should block qualified wildcard on physical table', () => { + const sql = 'SELECT u.* FROM users u'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.error).toContain('Wildcard usage on physical tables is not allowed'); + expect(result.blockedTables).toContain('u'); + }); + + it('should allow wildcard on CTE', () => { + const sql = ` + WITH user_stats AS ( + SELECT user_id, COUNT(*) as count FROM orders GROUP BY user_id + ) + SELECT * FROM user_stats + `; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should allow qualified wildcard on CTE', () => { + const sql = ` + WITH user_stats AS ( + SELECT user_id, COUNT(*) as count FROM orders GROUP BY user_id + ) + SELECT us.* FROM user_stats us + `; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(true); + }); + + it('should block wildcard when CTE uses wildcard on physical table', () => { + const sql = ` + WITH user_cte AS ( + SELECT * FROM users + ) + SELECT * FROM user_cte + `; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.error).toContain('Wildcard usage on physical tables is not allowed'); + expect(result.blockedTables).toContain('users'); + }); + + it('should allow wildcard when CTE uses explicit columns', () => { + const sql = ` + WITH user_cte AS ( + SELECT id, name FROM users + ) + SELECT * FROM user_cte + `; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(true); + }); + + it('should block wildcard on physical tables in JOIN', () => { + const sql = ` + WITH orders_cte AS ( + SELECT order_id FROM orders + ) + SELECT oc.*, u.* FROM orders_cte oc JOIN users u ON oc.order_id = u.id + `; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.blockedTables).toContain('u'); + }); + + it('should allow explicit column selection', () => { + const sql = 'SELECT id, name, email FROM users'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(true); + }); + + it('should handle multiple physical tables with wildcards', () => { + const sql = 'SELECT u.*, o.* FROM users u JOIN orders o ON u.id = o.user_id'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.blockedTables).toEqual(expect.arrayContaining(['u', 'o'])); + }); + + it('should handle schema-qualified tables', () => { + const sql = 'SELECT * FROM public.users'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.error).toContain('Wildcard usage on physical tables is not allowed'); + }); + + it('should handle invalid SQL gracefully', () => { + const sql = 'NOT VALID SQL'; + const result = validateWildcardUsage(sql); + expect(result.isValid).toBe(false); + expect(result.error).toContain('Failed to validate wildcard usage'); + }); + }); + describe('checkQueryIsReadOnly', () => { it('should allow SELECT statements', () => { const result = checkQueryIsReadOnly('SELECT * FROM users'); diff --git a/packages/ai/src/utils/sql-permissions/sql-parser-helpers.ts b/packages/ai/src/utils/sql-permissions/sql-parser-helpers.ts index 22c04062e..9ae605b5c 100644 --- a/packages/ai/src/utils/sql-permissions/sql-parser-helpers.ts +++ b/packages/ai/src/utils/sql-permissions/sql-parser-helpers.ts @@ -15,6 +15,12 @@ export interface QueryTypeCheckResult { error?: string; } +export interface WildcardValidationResult { + isValid: boolean; + error?: string; + blockedTables?: string[]; +} + // Map data source syntax to node-sql-parser dialect const DIALECT_MAPPING: Record = { // Direct mappings @@ -338,6 +344,148 @@ export function extractTablesFromYml(ymlContent: string): ParsedTable[] { return tables; } +/** + * Validates that wildcards (SELECT *) are not used on physical tables + * Allows wildcards on CTEs but blocks them on physical database tables + */ +export function validateWildcardUsage(sql: string, dataSourceSyntax?: string): WildcardValidationResult { + const dialect = getParserDialect(dataSourceSyntax); + const parser = new Parser(); + + try { + // Parse SQL into AST with the appropriate dialect + const ast = parser.astify(sql, { database: dialect }); + + // Handle single statement or array of statements + const statements = Array.isArray(ast) ? ast : [ast]; + + // Extract CTE names to allow wildcards on them + const cteNames = new Set(); + for (const statement of statements) { + if ('with' in statement && statement.with && Array.isArray(statement.with)) { + for (const cte of statement.with) { + if (cte.name?.value) { + cteNames.add(cte.name.value.toLowerCase()); + } + } + } + } + + // Check each statement for wildcard usage + const blockedTables: string[] = []; + + for (const statement of statements) { + if ('type' in statement && statement.type === 'select') { + const wildcardTables = findWildcardUsageOnPhysicalTables(statement, cteNames); + blockedTables.push(...wildcardTables); + } + } + + if (blockedTables.length > 0) { + const tableList = blockedTables.join(', '); + return { + isValid: false, + error: `Wildcard usage on physical tables is not allowed: ${tableList}. Please specify explicit column names.`, + blockedTables, + }; + } + + return { isValid: true }; + } catch (error) { + return { + isValid: false, + error: `Failed to validate wildcard usage: ${error instanceof Error ? error.message : 'Unknown error'}`, + }; + } +} + +/** + * Recursively finds wildcard usage on physical tables in a SELECT statement + */ +function findWildcardUsageOnPhysicalTables(selectStatement: any, cteNames: Set): string[] { + const blockedTables: string[] = []; + + if (selectStatement.columns && Array.isArray(selectStatement.columns)) { + for (const column of selectStatement.columns) { + if (column.expr && column.expr.type === 'column_ref') { + // Check for unqualified wildcard (SELECT *) + if (column.expr.column === '*' && !column.expr.table) { + // Get all tables in FROM clause that are not CTEs + const physicalTables = getPhysicalTablesFromFrom(selectStatement.from, cteNames); + blockedTables.push(...physicalTables); + } + // Check for qualified wildcard (SELECT table.*) + else if (column.expr.column === '*' && column.expr.table) { + const tableName = column.expr.table.toLowerCase(); + if (!cteNames.has(tableName)) { + blockedTables.push(tableName); + } + } + } + } + } + + if (selectStatement.with && Array.isArray(selectStatement.with)) { + for (const cte of selectStatement.with) { + if (cte.stmt && cte.stmt.type === 'select') { + const subBlocked = findWildcardUsageOnPhysicalTables(cte.stmt, cteNames); + blockedTables.push(...subBlocked); + } + } + } + + if (selectStatement.from && Array.isArray(selectStatement.from)) { + for (const fromItem of selectStatement.from) { + if (fromItem.expr && fromItem.expr.type === 'select') { + const subBlocked = findWildcardUsageOnPhysicalTables(fromItem.expr, cteNames); + blockedTables.push(...subBlocked); + } + } + } + + return blockedTables; +} + +/** + * Extracts physical table names from FROM clause, excluding CTEs + */ +function getPhysicalTablesFromFrom(fromClause: any[], cteNames: Set): string[] { + const tables: string[] = []; + + if (!fromClause || !Array.isArray(fromClause)) { + return tables; + } + + for (const fromItem of fromClause) { + if (fromItem.table) { + const tableName = typeof fromItem.table === 'string' + ? fromItem.table + : fromItem.table.table || fromItem.table; + + if (tableName && !cteNames.has(tableName.toLowerCase())) { + tables.push(tableName); + } + } + + // Handle JOINs + if (fromItem.join && Array.isArray(fromItem.join)) { + for (const joinItem of fromItem.join) { + if (joinItem.table) { + const tableName = typeof joinItem.table === 'string' + ? joinItem.table + : joinItem.table.table || joinItem.table; + + if (tableName && !cteNames.has(tableName.toLowerCase())) { + tables.push(tableName); + } + } + } + } + } + + return tables; +} + /** * Checks if a SQL query is read-only (SELECT statements only) * Returns error if query contains write operations