feat: implement wildcard validation in TypeScript sql-parser-helpers

- Add validateWildcardUsage() function to block SELECT * on physical tables
- Allow wildcards on CTEs but block on physical database tables
- Add comprehensive tests for wildcard validation scenarios
- Integrate wildcard validation into permission validator
- Supports all SQL dialects via node-sql-parser
- Prevents permission bypass through wildcard queries

Co-Authored-By: Dallin Bentley <dallinbentley98@gmail.com>
This commit is contained in:
Devin AI 2025-07-23 13:34:56 +00:00
parent e0f0f6509a
commit fdca380134
3 changed files with 263 additions and 0 deletions

View File

@ -5,6 +5,7 @@ import {
extractPhysicalTables,
extractTablesFromYml,
tablesMatch,
validateWildcardUsage,
} from './sql-parser-helpers';
export interface PermissionValidationResult {
@ -33,6 +34,15 @@ export async function validateSqlPermissions(
};
}
const wildcardCheck = validateWildcardUsage(sql, dataSourceSyntax);
if (!wildcardCheck.isValid) {
return {
isAuthorized: false,
unauthorizedTables: wildcardCheck.blockedTables || [],
error: wildcardCheck.error || 'Wildcard usage on physical tables is not allowed',
};
}
// Extract physical tables from SQL
const tablesInQuery = extractPhysicalTables(sql, dataSourceSyntax);

View File

@ -7,6 +7,7 @@ import {
normalizeTableIdentifier,
parseTableReference,
tablesMatch,
validateWildcardUsage,
} from './sql-parser-helpers';
describe('SQL Parser Helpers', () => {
@ -420,6 +421,110 @@ models:
});
});
describe('validateWildcardUsage', () => {
it('should block unqualified wildcard on physical table', () => {
const sql = 'SELECT * FROM users';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.error).toContain('Wildcard usage on physical tables is not allowed');
expect(result.blockedTables).toContain('users');
});
it('should block qualified wildcard on physical table', () => {
const sql = 'SELECT u.* FROM users u';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.error).toContain('Wildcard usage on physical tables is not allowed');
expect(result.blockedTables).toContain('u');
});
it('should allow wildcard on CTE', () => {
const sql = `
WITH user_stats AS (
SELECT user_id, COUNT(*) as count FROM orders GROUP BY user_id
)
SELECT * FROM user_stats
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should allow qualified wildcard on CTE', () => {
const sql = `
WITH user_stats AS (
SELECT user_id, COUNT(*) as count FROM orders GROUP BY user_id
)
SELECT us.* FROM user_stats us
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(true);
});
it('should block wildcard when CTE uses wildcard on physical table', () => {
const sql = `
WITH user_cte AS (
SELECT * FROM users
)
SELECT * FROM user_cte
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.error).toContain('Wildcard usage on physical tables is not allowed');
expect(result.blockedTables).toContain('users');
});
it('should allow wildcard when CTE uses explicit columns', () => {
const sql = `
WITH user_cte AS (
SELECT id, name FROM users
)
SELECT * FROM user_cte
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(true);
});
it('should block wildcard on physical tables in JOIN', () => {
const sql = `
WITH orders_cte AS (
SELECT order_id FROM orders
)
SELECT oc.*, u.* FROM orders_cte oc JOIN users u ON oc.order_id = u.id
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.blockedTables).toContain('u');
});
it('should allow explicit column selection', () => {
const sql = 'SELECT id, name, email FROM users';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(true);
});
it('should handle multiple physical tables with wildcards', () => {
const sql = 'SELECT u.*, o.* FROM users u JOIN orders o ON u.id = o.user_id';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.blockedTables).toEqual(expect.arrayContaining(['u', 'o']));
});
it('should handle schema-qualified tables', () => {
const sql = 'SELECT * FROM public.users';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.error).toContain('Wildcard usage on physical tables is not allowed');
});
it('should handle invalid SQL gracefully', () => {
const sql = 'NOT VALID SQL';
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(false);
expect(result.error).toContain('Failed to validate wildcard usage');
});
});
describe('checkQueryIsReadOnly', () => {
it('should allow SELECT statements', () => {
const result = checkQueryIsReadOnly('SELECT * FROM users');

View File

@ -15,6 +15,12 @@ export interface QueryTypeCheckResult {
error?: string;
}
export interface WildcardValidationResult {
isValid: boolean;
error?: string;
blockedTables?: string[];
}
// Map data source syntax to node-sql-parser dialect
const DIALECT_MAPPING: Record<string, string> = {
// Direct mappings
@ -338,6 +344,148 @@ export function extractTablesFromYml(ymlContent: string): ParsedTable[] {
return tables;
}
/**
* Validates that wildcards (SELECT *) are not used on physical tables
* Allows wildcards on CTEs but blocks them on physical database tables
*/
export function validateWildcardUsage(sql: string, dataSourceSyntax?: string): WildcardValidationResult {
const dialect = getParserDialect(dataSourceSyntax);
const parser = new Parser();
try {
// Parse SQL into AST with the appropriate dialect
const ast = parser.astify(sql, { database: dialect });
// Handle single statement or array of statements
const statements = Array.isArray(ast) ? ast : [ast];
// Extract CTE names to allow wildcards on them
const cteNames = new Set<string>();
for (const statement of statements) {
if ('with' in statement && statement.with && Array.isArray(statement.with)) {
for (const cte of statement.with) {
if (cte.name?.value) {
cteNames.add(cte.name.value.toLowerCase());
}
}
}
}
// Check each statement for wildcard usage
const blockedTables: string[] = [];
for (const statement of statements) {
if ('type' in statement && statement.type === 'select') {
const wildcardTables = findWildcardUsageOnPhysicalTables(statement, cteNames);
blockedTables.push(...wildcardTables);
}
}
if (blockedTables.length > 0) {
const tableList = blockedTables.join(', ');
return {
isValid: false,
error: `Wildcard usage on physical tables is not allowed: ${tableList}. Please specify explicit column names.`,
blockedTables,
};
}
return { isValid: true };
} catch (error) {
return {
isValid: false,
error: `Failed to validate wildcard usage: ${error instanceof Error ? error.message : 'Unknown error'}`,
};
}
}
/**
* Recursively finds wildcard usage on physical tables in a SELECT statement
*/
function findWildcardUsageOnPhysicalTables(selectStatement: any, cteNames: Set<string>): string[] {
const blockedTables: string[] = [];
if (selectStatement.columns && Array.isArray(selectStatement.columns)) {
for (const column of selectStatement.columns) {
if (column.expr && column.expr.type === 'column_ref') {
// Check for unqualified wildcard (SELECT *)
if (column.expr.column === '*' && !column.expr.table) {
// Get all tables in FROM clause that are not CTEs
const physicalTables = getPhysicalTablesFromFrom(selectStatement.from, cteNames);
blockedTables.push(...physicalTables);
}
// Check for qualified wildcard (SELECT table.*)
else if (column.expr.column === '*' && column.expr.table) {
const tableName = column.expr.table.toLowerCase();
if (!cteNames.has(tableName)) {
blockedTables.push(tableName);
}
}
}
}
}
if (selectStatement.with && Array.isArray(selectStatement.with)) {
for (const cte of selectStatement.with) {
if (cte.stmt && cte.stmt.type === 'select') {
const subBlocked = findWildcardUsageOnPhysicalTables(cte.stmt, cteNames);
blockedTables.push(...subBlocked);
}
}
}
if (selectStatement.from && Array.isArray(selectStatement.from)) {
for (const fromItem of selectStatement.from) {
if (fromItem.expr && fromItem.expr.type === 'select') {
const subBlocked = findWildcardUsageOnPhysicalTables(fromItem.expr, cteNames);
blockedTables.push(...subBlocked);
}
}
}
return blockedTables;
}
/**
* Extracts physical table names from FROM clause, excluding CTEs
*/
function getPhysicalTablesFromFrom(fromClause: any[], cteNames: Set<string>): string[] {
const tables: string[] = [];
if (!fromClause || !Array.isArray(fromClause)) {
return tables;
}
for (const fromItem of fromClause) {
if (fromItem.table) {
const tableName = typeof fromItem.table === 'string'
? fromItem.table
: fromItem.table.table || fromItem.table;
if (tableName && !cteNames.has(tableName.toLowerCase())) {
tables.push(tableName);
}
}
// Handle JOINs
if (fromItem.join && Array.isArray(fromItem.join)) {
for (const joinItem of fromItem.join) {
if (joinItem.table) {
const tableName = typeof joinItem.table === 'string'
? joinItem.table
: joinItem.table.table || joinItem.table;
if (tableName && !cteNames.has(tableName.toLowerCase())) {
tables.push(tableName);
}
}
}
}
}
return tables;
}
/**
* Checks if a SQL query is read-only (SELECT statements only)
* Returns error if query contains write operations