slq parser and data source helper and type mappings

This commit is contained in:
dal 2025-09-16 15:50:33 -06:00
parent 04e3ccda2b
commit 72ef9464dc
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
19 changed files with 877 additions and 4522 deletions

View File

@ -0,0 +1,383 @@
import { describe, it, expect } from 'vitest';
import {
extractPhysicalTables,
extractColumnReferences,
tablesMatch,
checkQueryIsReadOnly,
validateWildcardUsage,
extractTablesFromYml,
extractDatasetsFromYml,
} from './parser-helpers';
describe('extractPhysicalTables', () => {
it('should handle BigQuery queries with backtick-quoted project names', () => {
const sql = "SELECT COUNT(DISTINCT u.user_id) as total_users FROM `buster-381916`.analytics.user u";
const tables = extractPhysicalTables(sql, 'bigquery');
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
database: 'buster-381916',
schema: 'analytics',
table: 'user',
fullName: 'buster-381916.analytics.user'
});
});
it('should handle standard BigQuery queries without backticks', () => {
const sql = 'SELECT * FROM project.dataset.table';
const tables = extractPhysicalTables(sql, 'bigquery');
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
database: 'project',
schema: 'dataset',
table: 'table',
fullName: 'project.dataset.table'
});
});
it('should handle PostgreSQL queries', () => {
const sql = 'SELECT * FROM public.users';
const tables = extractPhysicalTables(sql, 'postgresql');
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
schema: 'public',
table: 'users',
fullName: 'public.users'
});
});
it('should handle Snowflake queries with quoted identifiers', () => {
const sql = 'SELECT * FROM "DATABASE"."SCHEMA"."TABLE"';
const tables = extractPhysicalTables(sql, 'snowflake');
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
database: 'DATABASE',
schema: 'SCHEMA',
table: 'TABLE',
fullName: 'DATABASE.SCHEMA.TABLE'
});
});
it('should handle multiple tables in joins', () => {
const sql = `
SELECT u.*, o.name
FROM users u
JOIN orders o ON u.id = o.user_id
`;
const tables = extractPhysicalTables(sql);
expect(tables).toHaveLength(2);
expect(tables.map(t => t.table)).toEqual(['users', 'orders']);
});
it('should ignore CTEs and subqueries', () => {
const sql = `
WITH user_stats AS (
SELECT user_id, COUNT(*) as count
FROM orders
GROUP BY user_id
)
SELECT * FROM users u JOIN user_stats s ON u.id = s.user_id
`;
const tables = extractPhysicalTables(sql);
expect(tables).toHaveLength(2);
expect(tables.map(t => t.table)).toEqual(['orders', 'users']);
});
});
describe('extractColumnReferences', () => {
it('should extract columns from WHERE clause', () => {
const sql = "SELECT id FROM users WHERE status = 'active' AND created_at > '2024-01-01'";
const columns = extractColumnReferences(sql);
expect(columns.has('users')).toBe(true);
const userColumns = Array.from(columns.get('users')!);
expect(userColumns).toContain('id');
expect(userColumns).toContain('status');
expect(userColumns).toContain('created_at');
});
it('should extract columns from JOIN conditions', () => {
const sql = `
SELECT u.id, o.total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
`;
const columns = extractColumnReferences(sql);
expect(columns.has('users')).toBe(true);
expect(Array.from(columns.get('users')!)).toEqual(['id']);
expect(columns.has('orders')).toBe(true);
expect(Array.from(columns.get('orders')!)).toContain('total');
expect(Array.from(columns.get('orders')!)).toContain('user_id');
expect(Array.from(columns.get('orders')!)).toContain('status');
});
it('should handle SELECT * queries', () => {
const sql = 'SELECT * FROM users';
const columns = extractColumnReferences(sql);
// SELECT * doesn't extract individual columns, it returns an empty map
expect(columns.size).toBe(0);
});
});
describe('checkQueryIsReadOnly', () => {
it('should allow SELECT queries', () => {
const result = checkQueryIsReadOnly('SELECT * FROM users');
expect(result.isReadOnly).toBe(true);
expect(result.error).toBeUndefined();
});
it('should allow BigQuery SELECT with backticks', () => {
const result = checkQueryIsReadOnly(
"SELECT COUNT(DISTINCT u.user_id) FROM `buster-381916`.analytics.user u",
'bigquery'
);
expect(result.isReadOnly).toBe(true);
expect(result.error).toBeUndefined();
});
it('should reject INSERT queries', () => {
const result = checkQueryIsReadOnly("INSERT INTO users VALUES ('test')");
expect(result.isReadOnly).toBe(false);
expect(result.error).toContain("Query type 'INSERT' is not allowed");
});
it('should reject UPDATE queries', () => {
const result = checkQueryIsReadOnly("UPDATE users SET name = 'test'");
expect(result.isReadOnly).toBe(false);
expect(result.error).toContain("Query type 'UPDATE' is not allowed");
});
it('should reject DELETE queries', () => {
const result = checkQueryIsReadOnly('DELETE FROM users');
expect(result.isReadOnly).toBe(false);
expect(result.error).toContain("Query type 'DELETE' is not allowed");
});
it('should reject DDL queries', () => {
const result = checkQueryIsReadOnly('CREATE TABLE test (id INT)');
expect(result.isReadOnly).toBe(false);
expect(result.error).toContain("Query type 'CREATE' is not allowed");
});
it('should allow SELECT with CTEs', () => {
const sql = `
WITH stats AS (SELECT COUNT(*) FROM orders)
SELECT * FROM stats
`;
const result = checkQueryIsReadOnly(sql);
expect(result.isReadOnly).toBe(true);
});
});
describe('validateWildcardUsage', () => {
it('should reject SELECT * on physical tables', () => {
const result = validateWildcardUsage('SELECT * FROM users');
expect(result.isValid).toBe(false);
expect(result.error).toContain('SELECT * is not allowed on physical table');
});
it('should reject SELECT * on BigQuery tables with backticks', () => {
const result = validateWildcardUsage(
'SELECT * FROM `buster-381916`.analytics.user',
'bigquery'
);
expect(result.isValid).toBe(false);
expect(result.error).toContain('SELECT * is not allowed on physical table');
});
it('should allow specific column selection', () => {
const result = validateWildcardUsage('SELECT id, name FROM users');
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should allow specific columns on BigQuery tables', () => {
const result = validateWildcardUsage(
"SELECT u.user_id, u.name FROM `buster-381916`.analytics.user u",
'bigquery'
);
expect(result.isValid).toBe(true);
expect(result.error).toBeUndefined();
});
it('should allow SELECT * on CTEs', () => {
const sql = `
WITH user_stats AS (
SELECT user_id, COUNT(*) as count
FROM orders
GROUP BY user_id
)
SELECT * FROM user_stats
`;
const result = validateWildcardUsage(sql);
expect(result.isValid).toBe(true);
});
});
describe('tablesMatch', () => {
it('should match exact table names', () => {
const queryTable = { table: 'users', fullName: 'users' };
const allowedTable = { table: 'users', fullName: 'users' };
expect(tablesMatch(queryTable, allowedTable)).toBe(true);
});
it('should match with schema qualification', () => {
const queryTable = { schema: 'public', table: 'users', fullName: 'public.users' };
const allowedTable = { table: 'users', fullName: 'users' };
expect(tablesMatch(queryTable, allowedTable)).toBe(true);
});
it('should match BigQuery three-part names', () => {
const queryTable = {
database: 'buster-381916',
schema: 'analytics',
table: 'user',
fullName: 'buster-381916.analytics.user'
};
const allowedTable = {
database: 'buster-381916',
schema: 'analytics',
table: 'user',
fullName: 'buster-381916.analytics.user'
};
expect(tablesMatch(queryTable, allowedTable)).toBe(true);
});
it('should be case-insensitive', () => {
const queryTable = { table: 'USERS', fullName: 'USERS' };
const allowedTable = { table: 'users', fullName: 'users' };
expect(tablesMatch(queryTable, allowedTable)).toBe(true);
});
it('should not match different tables', () => {
const queryTable = { table: 'orders', fullName: 'orders' };
const allowedTable = { table: 'users', fullName: 'users' };
expect(tablesMatch(queryTable, allowedTable)).toBe(false);
});
});
describe('extractTablesFromYml', () => {
it('should extract tables from YML content with models array', () => {
const yml = `
models:
- name: users
database: prod
schema: public
- name: orders
schema: sales
`;
const tables = extractTablesFromYml(yml);
expect(tables).toHaveLength(2);
expect(tables[0]).toMatchObject({
database: 'prod',
schema: 'public',
table: 'users',
fullName: 'prod.public.users'
});
expect(tables[1]).toMatchObject({
schema: 'sales',
table: 'orders',
fullName: 'sales.orders'
});
});
it('should handle flat format YML', () => {
const yml = `
name: user
database: buster-381916
schema: analytics
`;
const tables = extractTablesFromYml(yml);
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
database: 'buster-381916',
schema: 'analytics',
table: 'user',
fullName: 'buster-381916.analytics.user'
});
});
it('should handle BigQuery project names in models array', () => {
const yml = `
models:
- name: user
database: buster-381916
schema: analytics
`;
const tables = extractTablesFromYml(yml);
expect(tables).toHaveLength(1);
expect(tables[0]).toMatchObject({
database: 'buster-381916',
schema: 'analytics',
table: 'user',
fullName: 'buster-381916.analytics.user'
});
});
});
describe('extractDatasetsFromYml', () => {
it('should extract datasets with dimensions and measures', () => {
const yml = `
name: users
database: prod
schema: public
dimensions:
- name: id
- name: name
- name: email
measures:
- name: count
`;
const datasets = extractDatasetsFromYml(yml);
expect(datasets).toHaveLength(1);
expect(datasets[0].table).toBe('users');
expect(datasets[0].allowedColumns.size).toBe(4);
expect(datasets[0].allowedColumns.has('id')).toBe(true);
expect(datasets[0].allowedColumns.has('name')).toBe(true);
expect(datasets[0].allowedColumns.has('email')).toBe(true);
expect(datasets[0].allowedColumns.has('count')).toBe(true);
});
it('should handle models array with dimensions', () => {
const yml = `
models:
- name: orders
schema: sales
dimensions:
- name: id
- name: status
`;
const datasets = extractDatasetsFromYml(yml);
expect(datasets).toHaveLength(1);
expect(datasets[0].table).toBe('orders');
expect(datasets[0].allowedColumns.size).toBe(2);
expect(datasets[0].allowedColumns.has('id')).toBe(true);
expect(datasets[0].allowedColumns.has('status')).toBe(true);
});
it('should handle datasets without column restrictions', () => {
const yml = `
name: products
schema: inventory
`;
const datasets = extractDatasetsFromYml(yml);
expect(datasets).toHaveLength(1);
expect(datasets[0].table).toBe('products');
expect(datasets[0].allowedColumns.size).toBe(0);
});
});

View File

@ -1,52 +0,0 @@
import { createPermissionErrorMessage, validateSqlPermissions } from './permission-validator';
export interface ExecuteWithPermissionResult<T = unknown> {
success: boolean;
data?: T;
error?: string;
}
/**
* Wraps SQL execution with permission validation
* Ensures user has access to all tables referenced in the query
*/
export async function executeWithPermissionCheck<T>(
sql: string,
userId: string,
executeFn: () => Promise<T>,
dataSourceSyntax?: string
): Promise<ExecuteWithPermissionResult<T>> {
if (!userId) {
return {
success: false,
error: 'User authentication required for SQL execution',
};
}
// Validate permissions
const permissionResult = await validateSqlPermissions(sql, userId, dataSourceSyntax);
if (!permissionResult.isAuthorized) {
return {
success: false,
error: createPermissionErrorMessage(
permissionResult.unauthorizedTables,
permissionResult.unauthorizedColumns
),
};
}
// Execute if authorized
try {
const result = await executeFn();
return {
success: true,
data: result,
};
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'SQL execution failed',
};
}
}

View File

@ -1,3 +0,0 @@
export * from './sql-parser-helpers';
export * from './permission-validator';
export * from './execute-with-permission-check';

View File

@ -1,276 +0,0 @@
import { getPermissionedDatasets } from '@buster/access-controls';
import {
type ParsedDataset,
type ParsedTable,
checkQueryIsReadOnly,
extractColumnReferences,
extractDatasetsFromYml,
extractPhysicalTables,
extractTablesFromYml,
tablesMatch,
validateWildcardUsage,
} from './sql-parser-helpers.js';
export interface UnauthorizedColumn {
table: string;
column: string;
}
export interface PermissionValidationResult {
isAuthorized: boolean;
unauthorizedTables: string[];
unauthorizedColumns?: UnauthorizedColumn[];
error?: string;
}
/**
* Validates SQL query against user's permissioned datasets
* Checks that all tables and columns referenced in the query are accessible to the user
*/
export async function validateSqlPermissions(
sql: string,
userId: string,
dataSourceSyntax?: string
): Promise<PermissionValidationResult> {
try {
// First check if query is read-only
const readOnlyCheck = checkQueryIsReadOnly(sql, dataSourceSyntax);
if (!readOnlyCheck.isReadOnly) {
return {
isAuthorized: false,
unauthorizedTables: [],
error:
readOnlyCheck.error ||
'Only SELECT statements are allowed for read-only access. Please modify your query to use SELECT instead of write operations like INSERT, UPDATE, DELETE, or DDL statements.',
};
}
const wildcardCheck = validateWildcardUsage(sql, dataSourceSyntax);
// Store the wildcard error but continue to validate columns to provide comprehensive feedback
let wildcardError: string | undefined;
if (!wildcardCheck.isValid) {
wildcardError =
wildcardCheck.error ||
'SELECT * is not allowed on physical tables. Please explicitly list the column names you need (e.g., SELECT id, name, email FROM users). This helps prevent unintended data exposure and improves query performance.';
}
// Extract physical tables from SQL
const tablesInQuery = extractPhysicalTables(sql, dataSourceSyntax);
if (tablesInQuery.length === 0) {
// No tables referenced (might be a function call or constant select)
return { isAuthorized: true, unauthorizedTables: [] };
}
// Get user's permissioned datasets
const permissionedDatasets = await getPermissionedDatasets({
userId,
page: 0,
pageSize: 1000,
});
// Extract all allowed tables and datasets from permissions
const allowedTables: ParsedTable[] = [];
const allowedDatasets: ParsedDataset[] = [];
for (const dataset of permissionedDatasets.datasets) {
if (dataset.ymlContent) {
const tables = extractTablesFromYml(dataset.ymlContent);
allowedTables.push(...tables);
const datasetsWithColumns = extractDatasetsFromYml(dataset.ymlContent);
allowedDatasets.push(...datasetsWithColumns);
}
}
// Check each table in query against permissions
const unauthorizedTables: string[] = [];
for (const queryTable of tablesInQuery) {
let isAuthorized = false;
// Check if query table matches any allowed table
for (const allowedTable of allowedTables) {
const matches = tablesMatch(queryTable, allowedTable);
if (matches) {
isAuthorized = true;
break;
}
}
if (!isAuthorized) {
unauthorizedTables.push(queryTable.fullName);
}
}
// Continue to validate column-level permissions even if tables are unauthorized
// This allows us to report both unauthorized tables AND their columns
const columnReferences = extractColumnReferences(sql, dataSourceSyntax);
const unauthorizedColumns: UnauthorizedColumn[] = [];
for (const [tableName, columns] of columnReferences) {
// Find the matching allowed dataset for this table
let matchingDataset: ParsedDataset | undefined;
for (const dataset of allowedDatasets) {
// Check if table names match (case-insensitive)
const tableNameLower = tableName.toLowerCase();
const datasetFullNameLower = dataset.fullName.toLowerCase();
const datasetTableLower = dataset.table.toLowerCase();
// Handle different qualification levels - check both fullName and table
if (
tableNameLower === datasetFullNameLower ||
tableNameLower === datasetTableLower ||
tableNameLower.endsWith(`.${datasetTableLower}`) ||
datasetFullNameLower === tableNameLower
) {
matchingDataset = dataset;
break;
}
}
if (matchingDataset) {
// Found a matching dataset - validate columns if it has restrictions
if (matchingDataset.allowedColumns.size > 0) {
for (const column of columns) {
if (!matchingDataset.allowedColumns.has(column.toLowerCase())) {
unauthorizedColumns.push({
table: tableName,
column: column,
});
}
}
}
// If dataset has no column restrictions, it's backward compatibility mode (allow all columns)
} else {
// No matching dataset found - this table is completely unauthorized
// Check if this table was already marked as unauthorized
const isTableUnauthorized = unauthorizedTables.some((t) => {
const tLower = t.toLowerCase();
const tableNameLower = tableName.toLowerCase();
return (
tLower === tableNameLower ||
tLower.endsWith(`.${tableNameLower.split('.').pop()}`) ||
tableNameLower.endsWith(`.${tLower.split('.').pop()}`)
);
});
if (isTableUnauthorized) {
// Table is unauthorized, so all its columns are also unauthorized
for (const column of columns) {
unauthorizedColumns.push({
table: tableName,
column: column,
});
}
}
}
}
const result: PermissionValidationResult = {
isAuthorized:
unauthorizedTables.length === 0 && unauthorizedColumns.length === 0 && !wildcardError,
unauthorizedTables,
};
if (unauthorizedColumns.length > 0) {
result.unauthorizedColumns = unauthorizedColumns;
}
if (wildcardError) {
result.error = wildcardError;
}
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Provide more specific guidance based on the error
if (errorMessage.includes('parse')) {
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Failed to validate SQL permissions due to parsing error: ${errorMessage}. Please check your SQL syntax and ensure it's valid.`,
};
}
if (errorMessage.includes('permission')) {
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Permission check failed: ${errorMessage}. Please ensure you have the necessary access rights for the requested tables and columns.`,
};
}
return {
isAuthorized: false,
unauthorizedTables: [],
error: `Permission validation failed: ${errorMessage}. Please verify your SQL query syntax and ensure you have access to the requested resources.`,
};
}
}
/**
* Creates a detailed error message for unauthorized table or column access
*/
export function createPermissionErrorMessage(
unauthorizedTables: string[],
unauthorizedColumns?: UnauthorizedColumn[]
): string {
const messages: string[] = [];
// Handle unauthorized tables with actionable guidance
if (unauthorizedTables.length > 0) {
const tableList = unauthorizedTables.join(', ');
if (unauthorizedTables.length === 1) {
messages.push(
`You do not have access to table: ${tableList}. Please request access to this table or use a different table that you have permissions for.`
);
} else {
messages.push(
`You do not have access to the following tables: ${tableList}. Please request access to these tables or modify your query to use only authorized tables.`
);
}
}
// Handle unauthorized columns
if (unauthorizedColumns && unauthorizedColumns.length > 0) {
// Group columns by table for better error messages
const columnsByTable = new Map<string, string[]>();
for (const { table, column } of unauthorizedColumns) {
if (!columnsByTable.has(table)) {
columnsByTable.set(table, []);
}
const tableColumns = columnsByTable.get(table);
if (tableColumns) {
tableColumns.push(column);
}
}
const columnMessages: string[] = [];
for (const [table, columns] of columnsByTable) {
const columnList = columns.join(', ');
columnMessages.push(
`Table '${table}': columns [${columnList}] are not available in your permitted dataset`
);
}
if (columnMessages.length === 1) {
messages.push(
`Unauthorized column access - ${columnMessages[0]}. Please use only the columns that are available in your permitted datasets, or request access to additional columns.`
);
} else {
messages.push(
`Unauthorized column access:\n${columnMessages.map((m) => ` - ${m}`).join('\n')}\n\nPlease modify your query to use only the columns available in your permitted datasets, or request access to the additional columns you need.`
);
}
}
if (messages.length === 0) {
return '';
}
return `Insufficient permissions: ${messages.join('. ')}`;
}

File diff suppressed because it is too large Load Diff

View File

@ -117,7 +117,9 @@ export class BigQueryAdapter extends BaseAdapter {
resultRows = resultRows.slice(0, maxRows);
}
// BigQuery doesn't provide detailed field metadata in the same way as other databases
// BigQuery field metadata extraction
// TODO: Extract schema from job metadata when available
// For now, return empty fields array to maintain compatibility
const fields: FieldMetadata[] = [];
return {

View File

@ -169,8 +169,8 @@ describe('MySQLAdapter', () => {
rows: [{ id: 1, name: 'Test' }],
rowCount: 1,
fields: [
{ name: 'id', type: 'mysql_type_LONG', nullable: true, length: 0, precision: 0 },
{ name: 'name', type: 'mysql_type_VAR_STRING', nullable: true, length: 0, precision: 0 },
{ name: 'id', type: 'integer', nullable: true, length: 0, precision: 0 },
{ name: 'name', type: 'varchar', nullable: true, length: 0, precision: 0 },
],
hasMoreRows: false,
});

View File

@ -5,6 +5,7 @@ import { type Credentials, DataSourceType, type MySQLCredentials } from '../type
import type { QueryParameter } from '../types/query';
import { type AdapterQueryResult, BaseAdapter, type FieldMetadata } from './base';
import { normalizeRowValues } from './helpers/normalize-values';
import { mapMySQLType } from './type-mappings/mysql';
/**
* MySQL database adapter
@ -116,7 +117,7 @@ export class MySQLAdapter extends BaseAdapter {
const fieldMetadata: FieldMetadata[] = Array.isArray(fields)
? fields.map((field) => ({
name: field.name,
type: `mysql_type_${field.type}`, // MySQL field type
type: mapMySQLType(`mysql_type_${field.type}`), // Map type code to normalized type
nullable: typeof field.flags === 'number' ? (field.flags & 1) === 0 : true, // NOT_NULL flag is bit 0
length: typeof field.length === 'number' && field.length > 0 ? field.length : 0,
precision:

View File

@ -243,8 +243,8 @@ describe('PostgreSQLAdapter', () => {
rows: [{ id: 1, name: 'Test' }],
rowCount: 1,
fields: [
{ name: 'id', type: 'pg_type_23', nullable: true, length: 4 },
{ name: 'name', type: 'pg_type_25', nullable: true, length: 0 },
{ name: 'id', type: 'integer', nullable: true, length: 4 },
{ name: 'name', type: 'text', nullable: true, length: 0 },
],
hasMoreRows: false,
});

View File

@ -6,6 +6,7 @@ import { type Credentials, DataSourceType, type PostgreSQLCredentials } from '..
import type { QueryParameter } from '../types/query';
import { type AdapterQueryResult, BaseAdapter, type FieldMetadata } from './base';
import { normalizeRowValues } from './helpers/normalize-values';
import { mapPostgreSQLType } from './type-mappings/postgresql';
// Internal types for pg-cursor that aren't exported
interface CursorResult {
@ -104,7 +105,7 @@ export class PostgreSQLAdapter extends BaseAdapter {
const fields: FieldMetadata[] =
result.fields?.map((field) => ({
name: field.name,
type: `pg_type_${field.dataTypeID}`, // PostgreSQL type ID
type: mapPostgreSQLType(`pg_type_${field.dataTypeID}`), // Map OID to normalized type
nullable: true, // PostgreSQL doesn't provide this info directly
length: field.dataTypeSize > 0 ? field.dataTypeSize : 0,
})) || [];
@ -149,7 +150,7 @@ export class PostgreSQLAdapter extends BaseAdapter {
if (fields.length === 0 && cursor._result?.fields) {
fields = cursor._result.fields.map((field) => ({
name: field.name,
type: `pg_type_${field.dataTypeID}`,
type: mapPostgreSQLType(`pg_type_${field.dataTypeID}`), // Map OID to normalized type
nullable: true,
length: field.dataTypeSize > 0 ? field.dataTypeSize : 0,
}));

View File

@ -201,8 +201,8 @@ describe('RedshiftAdapter', () => {
rows: [{ id: 1, name: 'Test' }],
rowCount: 1,
fields: [
{ name: 'id', type: 'redshift_type_23', nullable: true, length: 4 },
{ name: 'name', type: 'redshift_type_25', nullable: true, length: 0 },
{ name: 'id', type: 'integer', nullable: true, length: 4 },
{ name: 'name', type: 'text', nullable: true, length: 0 },
],
hasMoreRows: false,
});

View File

@ -6,6 +6,7 @@ import { type Credentials, DataSourceType, type RedshiftCredentials } from '../t
import type { QueryParameter } from '../types/query';
import { type AdapterQueryResult, BaseAdapter, type FieldMetadata } from './base';
import { normalizeRowValues } from './helpers/normalize-values';
import { mapPostgreSQLType } from './type-mappings/postgresql';
// Internal types for pg-cursor that aren't exported
interface CursorResult {
@ -95,7 +96,7 @@ export class RedshiftAdapter extends BaseAdapter {
const fields: FieldMetadata[] =
result.fields?.map((field) => ({
name: field.name,
type: `redshift_type_${field.dataTypeID}`, // Redshift type ID
type: mapPostgreSQLType(`pg_type_${field.dataTypeID}`), // Map OID to normalized type (Redshift uses PostgreSQL OIDs)
nullable: true, // Redshift doesn't provide this info directly
length: field.dataTypeSize > 0 ? field.dataTypeSize : 0,
})) || [];
@ -140,7 +141,7 @@ export class RedshiftAdapter extends BaseAdapter {
if (fields.length === 0 && cursor._result?.fields) {
fields = cursor._result.fields.map((field) => ({
name: field.name,
type: `redshift_type_${field.dataTypeID}`,
type: mapPostgreSQLType(`pg_type_${field.dataTypeID}`), // Map OID to normalized type (Redshift uses PostgreSQL OIDs)
nullable: true,
length: field.dataTypeSize > 0 ? field.dataTypeSize : 0,
}));

View File

@ -0,0 +1,130 @@
/**
* Unified type mapping utilities for all database adapters
* Provides consistent type normalization across different database systems
*/
import { mapPostgreSQLType, getPostgreSQLSimpleType } from './postgresql';
import { mapMySQLType, getMySQLSimpleType } from './mysql';
import type { FieldMetadata } from '../base';
export * from './postgresql';
export * from './mysql';
/**
* Database type identifiers for routing to correct mapper
*/
export type DatabaseType =
| 'postgresql'
| 'redshift'
| 'mysql'
| 'sqlserver'
| 'snowflake'
| 'bigquery';
/**
* Simple type categories used in metadata
*/
export type SimpleType = 'number' | 'text' | 'date';
/**
* Maps a database-specific type to a normalized type name
* @param dbType - The database type (postgresql, mysql, etc.)
* @param typeValue - The type value (OID, code, or name)
* @returns Normalized type name
*/
export function mapDatabaseType(dbType: DatabaseType, typeValue: string | number): string {
switch (dbType) {
case 'postgresql':
case 'redshift': // Redshift uses PostgreSQL type OIDs
return mapPostgreSQLType(typeValue);
case 'mysql':
return mapMySQLType(typeValue);
case 'sqlserver':
case 'snowflake':
// These already return readable type names
return typeof typeValue === 'string' ? typeValue.toLowerCase() : 'text';
case 'bigquery':
// BigQuery types are usually already normalized
return typeof typeValue === 'string' ? typeValue.toLowerCase() : 'text';
default:
return typeof typeValue === 'string' ? typeValue.toLowerCase() : 'text';
}
}
/**
* Gets the simple type category for a normalized type
* @param dbType - The database type
* @param normalizedType - The normalized type name
* @returns Simple type category
*/
export function getSimpleType(dbType: DatabaseType, normalizedType: string): SimpleType {
switch (dbType) {
case 'postgresql':
case 'redshift':
return getPostgreSQLSimpleType(normalizedType);
case 'mysql':
return getMySQLSimpleType(normalizedType);
case 'sqlserver':
case 'snowflake':
case 'bigquery':
default:
return getGenericSimpleType(normalizedType);
}
}
/**
* Generic simple type detection for databases without specific mappings
* @param normalizedType - The normalized type name
* @returns Simple type category
*/
export function getGenericSimpleType(normalizedType: string): SimpleType {
const lowerType = normalizedType.toLowerCase();
// Numeric types
if (
lowerType.includes('int') ||
lowerType.includes('float') ||
lowerType.includes('double') ||
lowerType.includes('decimal') ||
lowerType.includes('numeric') ||
lowerType.includes('number') ||
lowerType.includes('real') ||
lowerType === 'money'
) {
return 'number';
}
// Date/time types
if (
lowerType.includes('date') ||
lowerType.includes('time') ||
lowerType.includes('interval')
) {
return 'date';
}
// Everything else is text
return 'text';
}
/**
* Normalizes field metadata with proper type mappings
* @param fields - Raw field metadata from database adapter
* @param dbType - The database type
* @returns Normalized field metadata
*/
export function normalizeFieldMetadata(
fields: FieldMetadata[],
dbType: DatabaseType
): FieldMetadata[] {
return fields.map((field) => ({
...field,
type: mapDatabaseType(dbType, field.type),
}));
}

View File

@ -0,0 +1,120 @@
/**
* MySQL type code to normalized type name mappings
* These type codes are from the MySQL protocol
* Reference: https://dev.mysql.com/doc/dev/mysql-server/latest/field__types_8h.html
*/
// MySQL type codes from mysql2 library
export const MYSQL_TYPE_CODE_MAP: Record<number, string> = {
// Numeric types
0: 'decimal',
1: 'tinyint',
2: 'smallint',
3: 'integer',
4: 'float',
5: 'double',
8: 'bigint',
9: 'mediumint',
246: 'decimal', // NEWDECIMAL
// Date and time types
7: 'timestamp',
10: 'date',
11: 'time',
12: 'datetime',
13: 'year',
// String types
15: 'varchar', // VARCHAR
16: 'bit',
247: 'enum',
248: 'set',
249: 'tinyblob', // TINY_BLOB
250: 'mediumblob', // MEDIUM_BLOB
251: 'longblob', // LONG_BLOB
252: 'blob', // BLOB
253: 'varchar', // VAR_STRING
254: 'char', // STRING
255: 'geometry',
// JSON type
245: 'json',
};
// Alternative mapping for text types that might appear differently
const TEXT_TYPE_ALIASES: Record<string, string> = {
'var_string': 'varchar',
'string': 'char',
'tiny_blob': 'tinytext',
'medium_blob': 'mediumtext',
'long_blob': 'longtext',
'blob': 'text',
'long': 'integer', // LONG type is an integer type
};
/**
* Maps MySQL type code or type string to a normalized type name
* @param mysqlType - MySQL type as numeric code or string like "mysql_type_253"
* @returns Normalized type name
*/
export function mapMySQLType(mysqlType: string | number): string {
// Handle numeric type code
if (typeof mysqlType === 'number') {
return MYSQL_TYPE_CODE_MAP[mysqlType] || 'text';
}
// Handle string format "mysql_type_253"
if (typeof mysqlType === 'string') {
const match = mysqlType.match(/^mysql_type_(\d+)$/);
if (match && match[1]) {
const typeCode = parseInt(match[1], 10);
return MYSQL_TYPE_CODE_MAP[typeCode] || 'text';
}
// Check for text type aliases (handle both mysql_type_ prefix and plain types)
const lowerType = mysqlType.toLowerCase();
// Remove mysql_type_ prefix if present
const cleanType = lowerType.replace(/^mysql_type_/, '');
if (TEXT_TYPE_ALIASES[cleanType]) {
return TEXT_TYPE_ALIASES[cleanType];
}
// If it's already a type name, return it
return lowerType;
}
return 'text';
}
/**
* Determines the simple type category for metadata
* @param normalizedType - The normalized MySQL type name
* @returns Simple type category: 'number', 'text', or 'date'
*/
export function getMySQLSimpleType(normalizedType: string): 'number' | 'text' | 'date' {
const lowerType = normalizedType.toLowerCase();
// Numeric types
if (
lowerType.includes('int') ||
lowerType.includes('float') ||
lowerType.includes('double') ||
lowerType.includes('decimal') ||
lowerType.includes('numeric') ||
lowerType === 'bit'
) {
return 'number';
}
// Date/time types
if (
lowerType.includes('date') ||
lowerType.includes('time') ||
lowerType === 'year'
) {
return 'date';
}
// Everything else is text
return 'text';
}

View File

@ -0,0 +1,145 @@
/**
* PostgreSQL OID to normalized type name mappings
* These OIDs are stable across PostgreSQL versions
* Reference: https://github.com/postgres/postgres/blob/master/src/include/catalog/pg_type.dat
*/
// Common PostgreSQL type OIDs
export const POSTGRESQL_TYPE_OID_MAP: Record<number, string> = {
// Boolean type
16: 'boolean',
// Numeric types
20: 'bigint', // int8
21: 'smallint', // int2
23: 'integer', // int4
700: 'float4', // real
701: 'float8', // double precision
1700: 'numeric', // decimal
// String types
18: 'char',
19: 'name',
25: 'text',
1042: 'char', // bpchar
1043: 'varchar',
// Date/time types
1082: 'date',
1083: 'time',
1114: 'timestamp', // timestamp without timezone
1184: 'timestamptz', // timestamp with timezone
1186: 'interval',
1266: 'timetz', // time with timezone
// UUID
2950: 'uuid',
// JSON types
114: 'json',
3802: 'jsonb',
// Binary
17: 'bytea',
// Money
790: 'money',
// Network types
869: 'inet',
650: 'cidr',
774: 'macaddr',
829: 'macaddr8',
// Geometric types
600: 'point',
601: 'lseg',
602: 'path',
603: 'box',
604: 'polygon',
628: 'line',
718: 'circle',
// Array types (common ones)
1000: '_bool', // boolean array
1001: '_bytea', // bytea array
1002: '_char', // char array
1003: '_name', // name array
1005: '_int2', // smallint array
1007: '_int4', // integer array
1009: '_text', // text array
1014: '_bpchar', // char array
1015: '_varchar', // varchar array
1016: '_int8', // bigint array
1021: '_float4', // float4 array
1022: '_float8', // float8 array
1115: '_timestamp', // timestamp array
1182: '_date', // date array
1183: '_time', // time array
1185: '_timestamptz', // timestamptz array
1231: '_numeric', // numeric array
2951: '_uuid', // uuid array
3807: '_jsonb', // jsonb array
};
/**
* Maps PostgreSQL type OID or type string to a normalized type name
* @param pgType - PostgreSQL type as OID number or string like "pg_type_1043"
* @returns Normalized type name
*/
export function mapPostgreSQLType(pgType: string | number): string {
// Handle numeric OID
if (typeof pgType === 'number') {
return POSTGRESQL_TYPE_OID_MAP[pgType] || 'text';
}
// Handle string format "pg_type_1043"
if (typeof pgType === 'string') {
const match = pgType.match(/^pg_type_(\d+)$/);
if (match && match[1]) {
const oid = parseInt(match[1], 10);
return POSTGRESQL_TYPE_OID_MAP[oid] || 'text';
}
// If it's already a type name, return it
return pgType.toLowerCase();
}
return 'text';
}
/**
* Determines the simple type category for metadata
* @param normalizedType - The normalized PostgreSQL type name
* @returns Simple type category: 'number', 'text', or 'date'
*/
export function getPostgreSQLSimpleType(normalizedType: string): 'number' | 'text' | 'date' {
const lowerType = normalizedType.toLowerCase();
// Numeric types
if (
lowerType.includes('int') ||
lowerType.includes('float') ||
lowerType.includes('numeric') ||
lowerType.includes('decimal') ||
lowerType.includes('real') ||
lowerType.includes('double') ||
lowerType === 'money' ||
lowerType === 'bigint' ||
lowerType === 'smallint'
) {
return 'number';
}
// Date/time types
if (
lowerType.includes('date') ||
lowerType.includes('time') ||
lowerType.includes('interval')
) {
return 'date';
}
// Everything else is text
return 'text';
}

View File

@ -1,5 +1,6 @@
import type { ColumnMetaData, DataMetadata } from '@buster/server-shared/metrics';
import type { FieldMetadata } from '../adapters/base';
import { getGenericSimpleType } from '../adapters/type-mappings';
/**
* Creates DataMetadata from query results and optional column metadata from adapters
@ -34,27 +35,70 @@ export function createMetadataFromResults(
// Try to use adapter metadata if available
const adapterColumn = columns?.find((col) => col.name === columnName);
if (adapterColumn) {
// Map adapter types to our types (this is a simplified mapping)
const typeStr = adapterColumn.type.toLowerCase();
if (
typeStr.includes('int') ||
typeStr.includes('float') ||
typeStr.includes('numeric') ||
typeStr.includes('decimal') ||
typeStr.includes('number')
) {
simpleType = 'number';
columnType = typeStr.includes('int') ? 'int4' : 'float8';
} else if (typeStr.includes('date') || typeStr.includes('time')) {
simpleType = 'date';
columnType = typeStr.includes('timestamp') ? 'timestamp' : 'date';
} else if (typeStr.includes('bool')) {
// Booleans map to text in simple_type since 'boolean' isn't valid
simpleType = 'text';
columnType = 'bool';
} else {
simpleType = 'text';
columnType = 'text';
// Use the normalized type from the adapter
const normalizedType = adapterColumn.type.toLowerCase();
// Use our type mapping utility to determine simple type
simpleType = getGenericSimpleType(normalizedType);
// Map to allowed ColumnMetaData types
switch (normalizedType) {
case 'integer':
case 'int':
columnType = 'int4';
break;
case 'bigint':
columnType = 'int8';
break;
case 'smallint':
case 'tinyint':
columnType = 'int2';
break;
case 'double':
case 'double precision':
columnType = 'float8';
break;
case 'real':
case 'float':
columnType = 'float4';
break;
case 'boolean':
case 'bool':
columnType = 'bool';
simpleType = 'text'; // Booleans map to text in simple_type
break;
case 'varchar':
case 'char':
case 'string':
columnType = 'text';
break;
case 'timestamp':
case 'datetime':
columnType = 'timestamp';
break;
case 'timestamptz':
columnType = 'timestamptz';
break;
case 'date':
columnType = 'date';
break;
case 'time':
columnType = 'time';
break;
case 'json':
columnType = 'json';
break;
case 'jsonb':
columnType = 'jsonb';
break;
case 'decimal':
case 'numeric':
columnType = 'numeric';
break;
default:
// Default to text for unknown types
columnType = 'text';
break;
}
} else if (values.length > 0) {
// Fallback: infer from data
@ -94,10 +138,23 @@ export function createMetadataFromResults(
const uniqueValues = new Set(values);
if (simpleType === 'number' && values.length > 0) {
const numericValues = values.filter((v): v is number => typeof v === 'number');
// Try to convert values to numbers for proper comparison
const numericValues = values
.map((v) => {
if (typeof v === 'number') return v;
if (typeof v === 'string' && !isNaN(Number(v))) return Number(v);
return null;
})
.filter((v): v is number => v !== null);
if (numericValues.length > 0) {
minValue = Math.min(...numericValues);
maxValue = Math.max(...numericValues);
} else {
// Fallback to string comparison if no valid numbers found
const sortedValues = [...values].sort();
minValue = String(sortedValues[0]);
maxValue = String(sortedValues[sortedValues.length - 1]);
}
} else if (simpleType === 'date' && values.length > 0) {
const dateValues = values

View File

@ -100,6 +100,9 @@ export async function executeMetricQuery(
// Trim results to requested limit if we have more
const data = hasMoreRecords ? allResults.slice(0, maxRows) : allResults;
// Create metadata from original results and column information BEFORE conversion
const dataMetadata = createMetadataFromResults(data, result.columns);
// Convert data to match expected type (string | number | null)
const typedData = data.map((row) => {
const typedRow: Record<string, string | number | null> = {};
@ -118,9 +121,6 @@ export async function executeMetricQuery(
return typedRow;
});
// Create metadata from results and column information
const dataMetadata = createMetadataFromResults(typedData, result.columns);
// Validate and parse result metadata if available
const validatedMetadata = resultMetadataSchema.safeParse(result.metadata);
const parsedMetadata = validatedMetadata.success ? validatedMetadata.data : undefined;