mirror of https://github.com/buster-so/buster.git
bigquery parser
This commit is contained in:
parent
6ad4654200
commit
21713a2e3b
|
@ -10,6 +10,7 @@ import { BigQueryIntrospector } from '../introspection/bigquery';
|
|||
import { type BigQueryCredentials, type Credentials, DataSourceType } from '../types/credentials';
|
||||
import type { QueryParameter } from '../types/query';
|
||||
import { type AdapterQueryResult, BaseAdapter, type FieldMetadata } from './base';
|
||||
import { fixBigQueryTableReferences } from './helpers/bigquery-sql-fixer';
|
||||
import { normalizeRowValues } from './helpers/normalize-values';
|
||||
import { getBigQuerySimpleType, mapBigQueryType } from './type-mappings/bigquery';
|
||||
|
||||
|
@ -75,8 +76,11 @@ export class BigQueryAdapter extends BaseAdapter {
|
|||
}
|
||||
|
||||
try {
|
||||
// Fix SQL to ensure proper escaping of identifiers with special characters
|
||||
const fixedSql = fixBigQueryTableReferences(sql);
|
||||
|
||||
const options: Query = {
|
||||
query: sql,
|
||||
query: fixedSql,
|
||||
useLegacySql: false,
|
||||
};
|
||||
|
||||
|
@ -97,12 +101,12 @@ export class BigQueryAdapter extends BaseAdapter {
|
|||
// Handle parameterized queries - BigQuery uses named parameters
|
||||
if (params && params.length > 0) {
|
||||
// Convert positional parameters to named parameters
|
||||
let processedSql = sql;
|
||||
let processedSql = fixedSql;
|
||||
const namedParams: Record<string, QueryParameter> = {};
|
||||
|
||||
// Replace ? placeholders with @param0, @param1, etc.
|
||||
let paramIndex = 0;
|
||||
processedSql = sql.replace(/\?/g, () => {
|
||||
processedSql = fixedSql.replace(/\?/g, () => {
|
||||
const paramName = `param${paramIndex}`;
|
||||
const paramValue = params[paramIndex];
|
||||
if (paramValue !== undefined) {
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { fixBigQueryTableReferences } from './bigquery-sql-fixer';
|
||||
|
||||
describe('fixBigQueryTableReferences', () => {
|
||||
it('should add backticks to project IDs with hyphens', () => {
|
||||
const sql = 'SELECT * FROM buster-381916.analytics.user';
|
||||
const expected = 'SELECT * FROM `buster-381916`.analytics.user';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle table aliases correctly', () => {
|
||||
const sql = 'SELECT u.id FROM buster-381916.analytics.user u';
|
||||
const expected = 'SELECT u.id FROM `buster-381916`.analytics.user u';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle AS aliases correctly', () => {
|
||||
const sql = 'SELECT * FROM my-project.dataset.table AS t';
|
||||
const expected = 'SELECT * FROM `my-project`.dataset.table AS t';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should not double-escape already escaped identifiers', () => {
|
||||
const sql = 'SELECT * FROM `buster-381916`.analytics.user';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(sql);
|
||||
});
|
||||
|
||||
it('should handle fully quoted table references', () => {
|
||||
const sql = 'SELECT * FROM `buster-381916.analytics.user`';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(sql);
|
||||
});
|
||||
|
||||
it('should handle multiple table references in JOINs', () => {
|
||||
const sql = `
|
||||
SELECT *
|
||||
FROM project-123.dataset1.table1 t1
|
||||
JOIN project-456.dataset2.table2 t2 ON t1.id = t2.id
|
||||
`;
|
||||
const expected = `
|
||||
SELECT *
|
||||
FROM \`project-123\`.dataset1.table1 t1
|
||||
JOIN \`project-456\`.dataset2.table2 t2 ON t1.id = t2.id
|
||||
`;
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle COUNT and other aggregate functions', () => {
|
||||
const sql = 'SELECT COUNT(DISTINCT u.user_id) as total_users FROM buster-381916.analytics.user u';
|
||||
const expected = 'SELECT COUNT(DISTINCT u.user_id) as total_users FROM `buster-381916`.analytics.user u';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should leave tables without special characters unchanged', () => {
|
||||
const sql = 'SELECT * FROM myproject.dataset.table';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(sql);
|
||||
});
|
||||
|
||||
it('should handle INSERT INTO statements', () => {
|
||||
const sql = 'INSERT INTO project-123.dataset.table VALUES (1, 2, 3)';
|
||||
const expected = 'INSERT INTO `project-123`.dataset.table VALUES (1, 2, 3)';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle UPDATE statements', () => {
|
||||
const sql = 'UPDATE project-123.dataset.table SET col = 1';
|
||||
const expected = 'UPDATE `project-123`.dataset.table SET col = 1';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle dataset names with special characters', () => {
|
||||
const sql = 'SELECT * FROM project.dataset-name.table';
|
||||
const expected = 'SELECT * FROM project.`dataset-name`.table';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle table names with special characters', () => {
|
||||
const sql = 'SELECT * FROM project.dataset.table-name';
|
||||
const expected = 'SELECT * FROM project.dataset.`table-name`';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle all three parts with special characters', () => {
|
||||
const sql = 'SELECT * FROM project-123.dataset-456.table-789';
|
||||
const expected = 'SELECT * FROM `project-123`.`dataset-456`.`table-789`';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle reserved keywords as identifiers', () => {
|
||||
const sql = 'SELECT * FROM project.dataset.select';
|
||||
const expected = 'SELECT * FROM project.dataset.`select`';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle complex queries with multiple references', () => {
|
||||
const sql = `
|
||||
WITH user_stats AS (
|
||||
SELECT user_id, COUNT(*) as count
|
||||
FROM buster-381916.analytics.events
|
||||
GROUP BY user_id
|
||||
)
|
||||
SELECT u.*, s.count
|
||||
FROM buster-381916.analytics.user u
|
||||
JOIN user_stats s ON u.id = s.user_id
|
||||
`;
|
||||
const expected = `
|
||||
WITH user_stats AS (
|
||||
SELECT user_id, COUNT(*) as count
|
||||
FROM \`buster-381916\`.analytics.events
|
||||
GROUP BY user_id
|
||||
)
|
||||
SELECT u.*, s.count
|
||||
FROM \`buster-381916\`.analytics.user u
|
||||
JOIN user_stats s ON u.id = s.user_id
|
||||
`;
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle table references with numbers', () => {
|
||||
const sql = 'SELECT * FROM project123.dataset456.table789';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(sql); // No special chars, no change needed
|
||||
});
|
||||
|
||||
it('should escape identifiers that start with numbers', () => {
|
||||
const sql = 'SELECT * FROM project.dataset.123table';
|
||||
const expected = 'SELECT * FROM project.dataset.`123table`';
|
||||
expect(fixBigQueryTableReferences(sql)).toBe(expected);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,135 @@
|
|||
/**
|
||||
* Helper functions to fix BigQuery SQL syntax issues
|
||||
*/
|
||||
|
||||
/**
|
||||
* Escapes BigQuery identifiers that contain special characters (like hyphens)
|
||||
* by wrapping them in backticks if not already escaped.
|
||||
*
|
||||
* BigQuery requires backticks for:
|
||||
* - Project IDs with hyphens (e.g., `buster-381916`)
|
||||
* - Reserved keywords used as identifiers
|
||||
* - Identifiers with special characters
|
||||
*/
|
||||
export function fixBigQueryTableReferences(sql: string): string {
|
||||
// Pattern to match table references in various formats:
|
||||
// 1. project.dataset.table (unquoted)
|
||||
// 2. project-with-hyphens.dataset.table (needs fixing)
|
||||
// 3. `project`.dataset.table (partially quoted)
|
||||
// 4. `project.dataset.table` (fully quoted - leave as is)
|
||||
|
||||
// Pattern to match table references in various SQL contexts
|
||||
// Use word boundaries to avoid matching partial words
|
||||
const tableReferencePattern = /\b(?:FROM|JOIN|INTO|UPDATE|TABLE)\s+([^\s,;()]+)/gi;
|
||||
|
||||
let fixedSql = sql;
|
||||
const replacements: Array<{start: number; end: number; replacement: string}> = [];
|
||||
|
||||
let match;
|
||||
while ((match = tableReferencePattern.exec(sql)) !== null) {
|
||||
const fullTableRef = match[1];
|
||||
if (!fullTableRef) continue; // Safety check
|
||||
|
||||
// Skip if already fully quoted with backticks
|
||||
if (fullTableRef.startsWith('`') && fullTableRef.endsWith('`')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this table reference needs escaping
|
||||
const fixedRef = escapeTableReference(fullTableRef);
|
||||
|
||||
if (fixedRef !== fullTableRef) {
|
||||
const startIndex = match.index + match[0].indexOf(fullTableRef);
|
||||
const endIndex = startIndex + fullTableRef.length;
|
||||
replacements.push({ start: startIndex, end: endIndex, replacement: fixedRef });
|
||||
}
|
||||
}
|
||||
|
||||
// Apply replacements in reverse order to maintain correct indices
|
||||
for (let i = replacements.length - 1; i >= 0; i--) {
|
||||
const item = replacements[i];
|
||||
if (!item) continue; // Safety check
|
||||
const { start, end, replacement } = item;
|
||||
fixedSql = fixedSql.substring(0, start) + replacement + fixedSql.substring(end);
|
||||
}
|
||||
|
||||
return fixedSql;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes a single table reference if needed
|
||||
*/
|
||||
function escapeTableReference(tableRef: string): string {
|
||||
// Remove any alias (e.g., "table_name alias" or "table_name AS alias")
|
||||
const aliasMatch = tableRef.match(/^([^\s]+)(?:\s+(?:AS\s+)?(\w+))?$/i);
|
||||
let tablePart = tableRef;
|
||||
let aliasPart = '';
|
||||
|
||||
if (aliasMatch && aliasMatch[1]) {
|
||||
tablePart = aliasMatch[1];
|
||||
aliasPart = aliasMatch[2] ? ` ${aliasMatch[2]}` : '';
|
||||
}
|
||||
|
||||
// Check if it's a multi-part name (project.dataset.table or dataset.table)
|
||||
const parts = tablePart.split('.');
|
||||
|
||||
if (parts.length >= 2) {
|
||||
// Check each part for special characters that need escaping
|
||||
const escapedParts = parts.map((part, index) => {
|
||||
// Remove existing backticks if any
|
||||
const cleanPart = part.replace(/^`|`$/g, '');
|
||||
|
||||
// For BigQuery, we primarily need to escape identifiers with hyphens
|
||||
// Don't escape common table/dataset names unless they have special chars
|
||||
if (needsBackticks(cleanPart, index === parts.length - 1)) {
|
||||
return `\`${cleanPart}\``;
|
||||
}
|
||||
|
||||
return cleanPart;
|
||||
});
|
||||
|
||||
return escapedParts.join('.') + aliasPart;
|
||||
}
|
||||
|
||||
// For single-part names, check if it needs escaping
|
||||
const cleanTableName = tablePart.replace(/^`|`$/g, '');
|
||||
if (needsBackticks(cleanTableName, true)) {
|
||||
return `\`${cleanTableName}\`${aliasPart}`;
|
||||
}
|
||||
|
||||
return tableRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if an identifier needs backticks
|
||||
* @param identifier The identifier to check
|
||||
* @param isTableName Whether this is the table name (last part of the reference)
|
||||
*/
|
||||
function needsBackticks(identifier: string, isTableName: boolean = false): boolean {
|
||||
// Check for hyphens or other special characters (not underscore or alphanumeric)
|
||||
if (/[^a-zA-Z0-9_]/.test(identifier)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if starts with a number
|
||||
if (/^\d/.test(identifier)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Only check for reserved keywords if it's being used as a table/dataset name
|
||||
// and it exactly matches a reserved keyword (not just contains one)
|
||||
if (isTableName) {
|
||||
// Limited set of truly problematic reserved keywords when used as table names
|
||||
const problematicKeywords = [
|
||||
'SELECT', 'FROM', 'WHERE', 'JOIN', 'GROUP', 'ORDER',
|
||||
'UNION', 'WITH', 'AS', 'ON', 'AND', 'OR', 'NOT',
|
||||
'NULL', 'TRUE', 'FALSE', 'CASE', 'WHEN', 'THEN'
|
||||
];
|
||||
|
||||
if (problematicKeywords.includes(identifier.toUpperCase())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
Loading…
Reference in New Issue