Integrate SQL functionality by adding SQL routes and updating dependencies. Refactor access control imports to use centralized access-controls package. Update pnpm-lock and package.json for new dependencies including node-sql-parser and yaml.

This commit is contained in:
dal 2025-09-15 15:07:43 -06:00
parent 0a28d36721
commit 2761b3f0fc
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
24 changed files with 2218 additions and 19 deletions

View File

@ -17,6 +17,7 @@ import s3IntegrationsRoutes from './s3-integrations';
import securityRoutes from './security';
import shortcutsRoutes from './shortcuts';
import slackRoutes from './slack';
import sqlRoutes from './sql';
import supportRoutes from './support';
import titleRoutes from './title';
import userRoutes from './users';
@ -33,6 +34,7 @@ const app = new Hono()
.route('/metric_files', metricFilesRoutes)
.route('/github', githubRoutes)
.route('/slack', slackRoutes)
.route('/sql', sqlRoutes)
.route('/support', supportRoutes)
.route('/security', securityRoutes)
.route('/shortcuts', shortcutsRoutes)

View File

@ -0,0 +1,8 @@
import { Hono } from 'hono';
import run from './run';
const app = new Hono()
// Mount the /run subrouter
.route('/run', run);
export default app;

View File

@ -0,0 +1,123 @@
import { createPermissionErrorMessage, validateSqlPermissions } from '@buster/access-controls';
import { executeMetricQuery } from '@buster/data-source';
import type { Credentials } from '@buster/data-source';
import type { User } from '@buster/database';
import {
getDataSourceById,
getDataSourceCredentials,
getUserOrganizationId,
} from '@buster/database';
import type { RunSqlRequest, RunSqlResponse } from '@buster/server-shared';
import { HTTPException } from 'hono/http-exception';
/**
* Handler for running SQL queries against data sources
*
* This handler:
* 1. Validates user has access to the organization
* 2. Verifies data source belongs to user's organization
* 3. Validates SQL permissions against user's permissioned datasets
* 4. Executes the query with retry logic and timeout handling
* 5. Returns the data with metadata and pagination info
*
* @param request - The SQL query request containing data_source_id and sql
* @param user - The authenticated user
* @returns The query results with metadata
*/
export async function runSqlHandler(request: RunSqlRequest, user: User): Promise<RunSqlResponse> {
// Get user's organization
const userOrg = await getUserOrganizationId(user.id);
if (!userOrg) {
throw new HTTPException(403, {
message: 'You must be part of an organization to run SQL queries',
});
}
const { organizationId } = userOrg;
// Get data source details
const dataSource = await getDataSourceById(request.data_source_id);
if (!dataSource) {
throw new HTTPException(404, {
message: 'Data source not found',
});
}
// Verify data source belongs to user's organization
if (dataSource.organizationId !== organizationId) {
throw new HTTPException(403, {
message: 'You do not have permission to access this data source',
});
}
// Validate SQL against user's permissioned datasets
const permissionResult = await validateSqlPermissions(request.sql, user.id, dataSource.type);
if (!permissionResult.isAuthorized) {
const errorMessage =
permissionResult.error ||
createPermissionErrorMessage(
permissionResult.unauthorizedTables,
permissionResult.unauthorizedColumns
);
throw new HTTPException(403, {
message: errorMessage,
});
}
// Get data source credentials from vault
let credentials: Credentials;
try {
const rawCredentials = await getDataSourceCredentials({
dataSourceId: request.data_source_id,
});
// Ensure credentials have the correct type
credentials = {
...rawCredentials,
type: rawCredentials.type || dataSource.type,
} as Credentials;
} catch (error) {
console.error('Failed to retrieve data source credentials:', error);
throw new HTTPException(500, {
message: 'Failed to access data source',
});
}
// Execute query using the shared utility with 5000 row limit
try {
// Request one extra row to detect if there are more records
const result = await executeMetricQuery(request.data_source_id, request.sql, credentials, {
maxRows: 5001,
timeout: 60000, // 60 seconds
retryDelays: [1000, 3000, 6000], // 1s, 3s, 6s
});
// Trim to 5000 rows and check if there are more records
const hasMore = result.data.length > 5000;
const trimmedData = result.data.slice(0, 5000);
const response: RunSqlResponse = {
data: trimmedData,
data_metadata: result.dataMetadata,
has_more_records: hasMore || result.hasMoreRecords,
};
return response;
} catch (error) {
console.error('Query execution failed:', error);
if (error instanceof Error) {
throw new HTTPException(500, {
message: `Query execution failed: ${error.message}`,
});
}
throw new HTTPException(500, {
message: 'Query execution failed',
});
}
}

View File

@ -0,0 +1,36 @@
import { RunSqlRequestSchema } from '@buster/server-shared';
import { zValidator } from '@hono/zod-validator';
import { Hono } from 'hono';
import { HTTPException } from 'hono/http-exception';
import { requireAuth } from '../../../../middleware/auth';
import '../../../../types/hono.types';
import { runSqlHandler } from './POST';
const app = new Hono()
// Apply authentication middleware to all routes
.use('*', requireAuth)
// POST /sql/run - Execute SQL query against data source
.post('/', zValidator('json', RunSqlRequestSchema), async (c) => {
const request = c.req.valid('json');
const user = c.get('busterUser');
const response = await runSqlHandler(request, user);
return c.json(response);
})
// Error handler for SQL run routes
.onError((err, c) => {
console.error('SQL run API error:', err);
// Let HTTPException responses pass through
if (err instanceof HTTPException) {
return err.getResponse();
}
// Default error response
return c.json({ error: 'Internal server error' }, 500);
});
export default app;

View File

@ -1,6 +1,6 @@
import type { RunSQLResponse } from '../../asset_interfaces/sql';
import { mainApi } from '../instances';
import { mainApiV2 } from '../instances';
export const runSQL = async (params: { data_source_id: string; sql: string }) => {
return mainApi.post<RunSQLResponse>('/sql/run', params).then((res) => res.data);
return mainApiV2.post<RunSQLResponse>('/sql/run', params).then((res) => res.data);
};

View File

@ -30,10 +30,13 @@
},
"dependencies": {
"@buster/database": "workspace:*",
"@buster/data-source": "workspace:*",
"@buster/env-utils": "workspace:*",
"@buster/typescript-config": "workspace:*",
"@buster/vitest-config": "workspace:*",
"lru-cache": "^11.1.0",
"node-sql-parser": "^5.3.12",
"yaml": "^2.8.1",
"zod": "catalog:",
"uuid": "catalog:",
"drizzle-orm": "catalog:"

View File

@ -28,6 +28,9 @@ export * from './datasets';
// Export user utilities
export * from './users';
// Export SQL permissions
export * from './sql-permissions';
// Export cache functions separately
export {
clearAllCaches,

View File

@ -0,0 +1,52 @@
import { createPermissionErrorMessage, validateSqlPermissions } from './validator';
export interface ExecuteWithPermissionResult<T = unknown> {
success: boolean;
data?: T;
error?: string;
}
/**
* Wraps SQL execution with permission validation
* Ensures user has access to all tables referenced in the query
*/
export async function executeWithPermissionCheck<T>(
sql: string,
userId: string,
executeFn: () => Promise<T>,
dataSourceSyntax?: string
): Promise<ExecuteWithPermissionResult<T>> {
if (!userId) {
return {
success: false,
error: 'User authentication required for SQL execution',
};
}
// Validate permissions
const permissionResult = await validateSqlPermissions(sql, userId, dataSourceSyntax);
if (!permissionResult.isAuthorized) {
return {
success: false,
error: createPermissionErrorMessage(
permissionResult.unauthorizedTables,
permissionResult.unauthorizedColumns
),
};
}
// Execute if authorized
try {
const result = await executeFn();
return {
success: true,
data: result,
};
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'SQL execution failed',
};
}
}

View File

@ -0,0 +1,3 @@
export * from './parser-helpers';
export * from './validator';
export * from './execute-with-permission-check';

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,276 @@
import { getPermissionedDatasets } from '../datasets/permissions';
import {
type ParsedDataset,
type ParsedTable,
checkQueryIsReadOnly,
extractColumnReferences,
extractDatasetsFromYml,
extractPhysicalTables,
extractTablesFromYml,
tablesMatch,
validateWildcardUsage,
} from './parser-helpers';
export interface UnauthorizedColumn {
table: string;
column: string;
}
export interface PermissionValidationResult {
isAuthorized: boolean;
unauthorizedTables: string[];
unauthorizedColumns?: UnauthorizedColumn[];
error?: string;
}
/**
* Validates SQL query against user's permissioned datasets
* Checks that all tables and columns referenced in the query are accessible to the user
*/
export async function validateSqlPermissions(
sql: string,
userId: string,
dataSourceSyntax?: string
): Promise<PermissionValidationResult> {
try {
// First check if query is read-only
const readOnlyCheck = checkQueryIsReadOnly(sql, dataSourceSyntax);
if (!readOnlyCheck.isReadOnly) {
return {
isAuthorized: false,
unauthorizedTables: [],
error:
readOnlyCheck.error ||
'Only SELECT statements are allowed for read-only access. Please modify your query to use SELECT instead of write operations like INSERT, UPDATE, DELETE, or DDL statements.',
};
}
const wildcardCheck = validateWildcardUsage(sql, dataSourceSyntax);
// Store the wildcard error but continue to validate columns to provide comprehensive feedback
let wildcardError: string | undefined;
if (!wildcardCheck.isValid) {
wildcardError =
wildcardCheck.error ||
'SELECT * is not allowed on physical tables. Please explicitly list the column names you need (e.g., SELECT id, name, email FROM users). This helps prevent unintended data exposure and improves query performance.';
}
// Extract physical tables from SQL
const tablesInQuery = extractPhysicalTables(sql, dataSourceSyntax);
if (tablesInQuery.length === 0) {
// No tables referenced (might be a function call or constant select)
return { isAuthorized: true, unauthorizedTables: [] };
}
// Get user's permissioned datasets
const permissionedDatasets = await getPermissionedDatasets({
userId,
page: 0,
pageSize: 1000,
});
// Extract all allowed tables and datasets from permissions
const allowedTables: ParsedTable[] = [];
const allowedDatasets: ParsedDataset[] = [];
for (const dataset of permissionedDatasets.datasets) {
if (dataset.ymlContent) {
const tables = extractTablesFromYml(dataset.ymlContent);
allowedTables.push(...tables);
const datasetsWithColumns = extractDatasetsFromYml(dataset.ymlContent);
allowedDatasets.push(...datasetsWithColumns);
}
}
// Check each table in query against permissions
const unauthorizedTables: string[] = [];
for (const queryTable of tablesInQuery) {
let isAuthorized = false;
// Check if query table matches any allowed table
for (const allowedTable of allowedTables) {
const matches = tablesMatch(queryTable, allowedTable);
if (matches) {
isAuthorized = true;
break;
}
}
if (!isAuthorized) {
unauthorizedTables.push(queryTable.fullName);
}
}
// Continue to validate column-level permissions even if tables are unauthorized
// This allows us to report both unauthorized tables AND their columns
const columnReferences = extractColumnReferences(sql, dataSourceSyntax);
const unauthorizedColumns: UnauthorizedColumn[] = [];
for (const [tableName, columns] of columnReferences) {
// Find the matching allowed dataset for this table
let matchingDataset: ParsedDataset | undefined;
for (const dataset of allowedDatasets) {
// Check if table names match (case-insensitive)
const tableNameLower = tableName.toLowerCase();
const datasetFullNameLower = dataset.fullName.toLowerCase();
const datasetTableLower = dataset.table.toLowerCase();
// Handle different qualification levels - check both fullName and table
if (
tableNameLower === datasetFullNameLower ||
tableNameLower === datasetTableLower ||
tableNameLower.endsWith(`.${datasetTableLower}`) ||
datasetFullNameLower === tableNameLower
) {
matchingDataset = dataset;
break;
}
}
if (matchingDataset) {
// Found a matching dataset - validate columns if it has restrictions
if (matchingDataset.allowedColumns.size > 0) {
for (const column of columns) {
if (!matchingDataset.allowedColumns.has(column.toLowerCase())) {
unauthorizedColumns.push({
table: tableName,
column: column,
});
}
}
}
// If dataset has no column restrictions, it's backward compatibility mode (allow all columns)
} else {
// No matching dataset found - this table is completely unauthorized
// Check if this table was already marked as unauthorized
const isTableUnauthorized = unauthorizedTables.some((t) => {
const tLower = t.toLowerCase();
const tableNameLower = tableName.toLowerCase();
return (
tLower === tableNameLower ||
tLower.endsWith(`.${tableNameLower.split('.').pop()}`) ||
tableNameLower.endsWith(`.${tLower.split('.').pop()}`)
);
});
if (isTableUnauthorized) {
// Table is unauthorized, so all its columns are also unauthorized
for (const column of columns) {
unauthorizedColumns.push({
table: tableName,
column: column,
});
}
}
}
}
const result: PermissionValidationResult = {
isAuthorized:
unauthorizedTables.length === 0 && unauthorizedColumns.length === 0 && !wildcardError,
unauthorizedTables,
};
if (unauthorizedColumns.length > 0) {
result.unauthorizedColumns = unauthorizedColumns;
}
if (wildcardError) {
result.error = wildcardError;
}
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Provide more specific guidance based on the error
if (errorMessage.includes('parse')) {
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Failed to validate SQL permissions due to parsing error: ${errorMessage}. Please check your SQL syntax and ensure it's valid.`,
};
}
if (errorMessage.includes('permission')) {
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Permission check failed: ${errorMessage}. Please ensure you have the necessary access rights for the requested tables and columns.`,
};
}
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Permission validation failed: ${errorMessage}. Please verify your SQL query syntax and ensure you have access to the requested resources.`,
};
}
}
/**
* Creates a detailed error message for unauthorized table or column access
*/
export function createPermissionErrorMessage(
unauthorizedTables: string[],
unauthorizedColumns?: UnauthorizedColumn[]
): string {
const messages: string[] = [];
// Handle unauthorized tables with actionable guidance
if (unauthorizedTables.length > 0) {
const tableList = unauthorizedTables.join(', ');
if (unauthorizedTables.length === 1) {
messages.push(
`You do not have access to table: ${tableList}. Please request access to this table or use a different table that you have permissions for.`
);
} else {
messages.push(
`You do not have access to the following tables: ${tableList}. Please request access to these tables or modify your query to use only authorized tables.`
);
}
}
// Handle unauthorized columns
if (unauthorizedColumns && unauthorizedColumns.length > 0) {
// Group columns by table for better error messages
const columnsByTable = new Map<string, string[]>();
for (const { table, column } of unauthorizedColumns) {
if (!columnsByTable.has(table)) {
columnsByTable.set(table, []);
}
const tableColumns = columnsByTable.get(table);
if (tableColumns) {
tableColumns.push(column);
}
}
const columnMessages: string[] = [];
for (const [table, columns] of columnsByTable) {
const columnList = columns.join(', ');
columnMessages.push(
`Table '${table}': columns [${columnList}] are not available in your permitted dataset`
);
}
if (columnMessages.length === 1) {
messages.push(
`Unauthorized column access - ${columnMessages[0]}. Please use only the columns that are available in your permitted datasets, or request access to additional columns.`
);
} else {
messages.push(
`Unauthorized column access:\n${columnMessages.map((m) => ` - ${m}`).join('\n')}\n\nPlease modify your query to use only the columns available in your permitted datasets, or request access to the additional columns you need.`
);
}
}
if (messages.length === 0) {
return '';
}
return `Insufficient permissions: ${messages.join('. ')}`;
}

View File

@ -1,11 +1,8 @@
import { createPermissionErrorMessage, validateSqlPermissions } from '@buster/access-controls';
import { type DataSource, withRateLimit } from '@buster/data-source';
import { updateMessageEntries } from '@buster/database';
import { wrapTraced } from 'braintrust';
import { getDataSource } from '../../../utils/get-data-source';
import {
createPermissionErrorMessage,
validateSqlPermissions,
} from '../../../utils/sql-permissions';
import { createRawToolResultEntry } from '../../shared/create-raw-llm-tool-result-entry';
import {
EXECUTE_SQL_TOOL_NAME,

View File

@ -1,7 +1,7 @@
import { checkQueryIsReadOnly } from '@buster/access-controls';
import { type DataSource, withRateLimit } from '@buster/data-source';
import { wrapTraced } from 'braintrust';
import { getDataSource } from '../../../utils/get-data-source';
import { checkQueryIsReadOnly } from '../../../utils/sql-permissions/sql-parser-helpers';
import type {
SuperExecuteSqlContext,
SuperExecuteSqlInput,

View File

@ -1,7 +1,7 @@
import { checkQueryIsReadOnly } from '@buster/access-controls';
import type { DataSource } from '@buster/data-source';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { getDataSource } from '../../../utils/get-data-source';
import { checkQueryIsReadOnly } from '../../../utils/sql-permissions/sql-parser-helpers';
import type { SuperExecuteSqlContext, SuperExecuteSqlState } from './super-execute-sql';
import { createSuperExecuteSqlExecute } from './super-execute-sql-execute';
@ -10,7 +10,7 @@ vi.mock('../../../utils/get-data-source', () => ({
getDataSource: vi.fn(),
}));
vi.mock('../../../utils/sql-permissions/sql-parser-helpers', () => ({
vi.mock('@buster/access-controls', () => ({
checkQueryIsReadOnly: vi.fn(),
}));

View File

@ -1,4 +1,5 @@
import { randomUUID } from 'node:crypto';
import { createPermissionErrorMessage, validateSqlPermissions } from '@buster/access-controls';
import type { Credentials } from '@buster/data-source';
import { createMetadataFromResults, executeMetricQuery } from '@buster/data-source';
import { assetPermissions, db, metricFiles, updateMessageEntries } from '@buster/database';
@ -12,10 +13,6 @@ import { wrapTraced } from 'braintrust';
import * as yaml from 'yaml';
import { z } from 'zod';
import { getDataSourceCredentials } from '../../../../utils/get-data-source';
import {
createPermissionErrorMessage,
validateSqlPermissions,
} from '../../../../utils/sql-permissions';
import { createRawToolResultEntry } from '../../../shared/create-raw-llm-tool-result-entry';
import { trackFileAssociations } from '../../file-tracking-helper';
import { validateAndAdjustBarLineAxes } from '../helpers/bar-line-axis-validator';

View File

@ -1,3 +1,4 @@
import { createPermissionErrorMessage, validateSqlPermissions } from '@buster/access-controls';
import type { Credentials } from '@buster/data-source';
import { createMetadataFromResults, executeMetricQuery } from '@buster/data-source';
import { db, metricFiles, updateMessageEntries } from '@buster/database';
@ -12,10 +13,6 @@ import { eq, inArray } from 'drizzle-orm';
import * as yaml from 'yaml';
import { z } from 'zod';
import { getDataSourceCredentials } from '../../../../utils/get-data-source';
import {
createPermissionErrorMessage,
validateSqlPermissions,
} from '../../../../utils/sql-permissions';
import { createRawToolResultEntry } from '../../../shared/create-raw-llm-tool-result-entry';
import { trackFileAssociations } from '../../file-tracking-helper';
import { validateAndAdjustBarLineAxes } from '../helpers/bar-line-axis-validator';

View File

@ -0,0 +1,36 @@
import { and, eq, isNull } from 'drizzle-orm';
import { z } from 'zod';
import { db } from '../../connection';
import { dataSources } from '../../schema';
// Zod schema for the data source
export const DataSourceSchema = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
organizationId: z.string(),
secretId: z.string(),
});
export type DataSource = z.infer<typeof DataSourceSchema>;
/**
* Fetches a data source by its ID
* @param dataSourceId - The ID of the data source to fetch
* @returns The data source or null if not found
*/
export async function getDataSourceById(dataSourceId: string): Promise<DataSource | null> {
const [result] = await db
.select({
id: dataSources.id,
name: dataSources.name,
type: dataSources.type,
organizationId: dataSources.organizationId,
secretId: dataSources.secretId,
})
.from(dataSources)
.where(and(eq(dataSources.id, dataSourceId), isNull(dataSources.deletedAt)))
.limit(1);
return result || null;
}

View File

@ -0,0 +1,2 @@
export * from './get-data-source-by-id';
export * from './organizationDataSource';

View File

@ -1 +1 @@
export * from './organizationDataSource';
export * from '../data-sources/organizationDataSource';

View File

@ -1,7 +1,7 @@
export * from './api-keys';
export * from './messages';
export * from './users';
export * from './dataSources';
export * from './data-sources';
export * from './datasets';
export * from './assets';
export * from './asset-permissions';

View File

@ -30,3 +30,4 @@ export * from './type-utilities';
export * from './user';
export * from './shortcuts';
export * from './healthcheck';
export * from './sql';

View File

@ -0,0 +1,17 @@
import { z } from 'zod';
import type { DataMetadata } from '../metrics';
// Request schema for running SQL queries
export const RunSqlRequestSchema = z.object({
data_source_id: z.string().uuid('Data source ID must be a valid UUID'),
sql: z.string().min(1, 'SQL query cannot be empty'),
});
export type RunSqlRequest = z.infer<typeof RunSqlRequestSchema>;
// Response type matching the structure from metric responses
export interface RunSqlResponse {
data: Record<string, string | number | null>[];
data_metadata: DataMetadata;
has_more_records: boolean;
}

View File

@ -926,6 +926,9 @@ importers:
packages/access-controls:
dependencies:
'@buster/data-source':
specifier: workspace:*
version: link:../data-source
'@buster/database':
specifier: workspace:*
version: link:../database
@ -944,9 +947,15 @@ importers:
lru-cache:
specifier: ^11.1.0
version: 11.1.0
node-sql-parser:
specifier: ^5.3.12
version: 5.3.12
uuid:
specifier: 'catalog:'
version: 11.1.0
yaml:
specifier: ^2.8.1
version: 2.8.1
zod:
specifier: 'catalog:'
version: 3.25.76