diff --git a/apps/server/src/api/v2/index.ts b/apps/server/src/api/v2/index.ts index 83813dda0..1cedaba0c 100644 --- a/apps/server/src/api/v2/index.ts +++ b/apps/server/src/api/v2/index.ts @@ -24,6 +24,7 @@ import slackRoutes from './slack'; import sqlRoutes from './sql'; import supportRoutes from './support'; import titleRoutes from './title'; +import { tools } from './tools'; import userRoutes from './users'; const app = new Hono() @@ -44,6 +45,7 @@ const app = new Hono() .route('/support', supportRoutes) .route('/security', securityRoutes) .route('/shortcuts', shortcutsRoutes) + .route('/tools', tools) .route('/organizations', organizationRoutes) .route('/dictionaries', dictionariesRoutes) .route('/title', titleRoutes) diff --git a/apps/server/src/api/v2/tools/index.ts b/apps/server/src/api/v2/tools/index.ts new file mode 100644 index 000000000..4dec0c55c --- /dev/null +++ b/apps/server/src/api/v2/tools/index.ts @@ -0,0 +1,5 @@ +import { Hono } from 'hono'; +import metadata from './metadata'; +import sql from './sql'; + +export const tools = new Hono().route('/sql', sql).route('/metadata', metadata); diff --git a/apps/server/src/api/v2/tools/metadata/GET.ts b/apps/server/src/api/v2/tools/metadata/GET.ts new file mode 100644 index 000000000..1ada2c882 --- /dev/null +++ b/apps/server/src/api/v2/tools/metadata/GET.ts @@ -0,0 +1,41 @@ +import { getDatasetMetadata } from '@buster/database/queries'; +import type { GetMetadataRequest, GetMetadataResponse } from '@buster/server-shared'; +import type { ApiKeyContext } from '@buster/server-shared'; +import { HTTPException } from 'hono/http-exception'; + +/** + * Handler for retrieving dataset metadata via API key authentication + * + * This handler: + * 1. Validates API key has access to the organization + * 2. Queries for dataset matching database, schema, name, and organization + * 3. Returns the dataset's metadata column + * + * @param request - The metadata request containing database, schema, and name + * @param apiKeyContext - The authenticated API key context + * @returns The dataset metadata + */ +export async function getMetadataHandler( + request: GetMetadataRequest, + apiKeyContext: ApiKeyContext +): Promise { + const { organizationId } = apiKeyContext; + + // Get dataset metadata + const result = await getDatasetMetadata({ + database: request.database, + schema: request.schema, + name: request.name, + organizationId, + }); + + if (!result || !result.metadata) { + throw new HTTPException(404, { + message: `Dataset not found: ${request.database}.${request.schema}.${request.name}`, + }); + } + + return { + metadata: result.metadata, + }; +} diff --git a/apps/server/src/api/v2/tools/metadata/index.ts b/apps/server/src/api/v2/tools/metadata/index.ts new file mode 100644 index 000000000..eb02cd884 --- /dev/null +++ b/apps/server/src/api/v2/tools/metadata/index.ts @@ -0,0 +1,41 @@ +import { GetMetadataRequestSchema } from '@buster/server-shared'; +import { zValidator } from '@hono/zod-validator'; +import { Hono } from 'hono'; +import { HTTPException } from 'hono/http-exception'; +import { createApiKeyAuthMiddleware } from '../../../../middleware/api-key-auth'; +import { getMetadataHandler } from './GET'; + +const app = new Hono() + // Apply API key authentication middleware to all routes + .use('*', createApiKeyAuthMiddleware()) + + // GET /tools/metadata - Retrieve dataset metadata with API key auth + .get('/', zValidator('query', GetMetadataRequestSchema), async (c) => { + const request = c.req.valid('query'); + const apiKeyContext = c.get('apiKey'); + + if (!apiKeyContext) { + throw new HTTPException(401, { + message: 'API key authentication required', + }); + } + + const response = await getMetadataHandler(request, apiKeyContext); + + return c.json(response); + }) + + // Error handler for metadata tool routes + .onError((err, c) => { + console.error('Metadata tool API error:', err); + + // Let HTTPException responses pass through + if (err instanceof HTTPException) { + return err.getResponse(); + } + + // Default error response + return c.json({ error: 'Internal server error' }, 500); + }); + +export default app; diff --git a/apps/server/src/api/v2/tools/sql/POST.ts b/apps/server/src/api/v2/tools/sql/POST.ts new file mode 100644 index 000000000..f8f8c8ad5 --- /dev/null +++ b/apps/server/src/api/v2/tools/sql/POST.ts @@ -0,0 +1,124 @@ +import { createPermissionErrorMessage, validateSqlPermissions } from '@buster/access-controls'; +import { executeMetricQuery } from '@buster/data-source'; +import type { Credentials } from '@buster/data-source'; +import { getDataSourceById, getDataSourceCredentials } from '@buster/database/queries'; +import type { RunSqlRequest, RunSqlResponse } from '@buster/server-shared'; +import type { ApiKeyContext } from '@buster/server-shared'; +import { HTTPException } from 'hono/http-exception'; + +/** + * Handler for running SQL queries against data sources via API key authentication + * + * This handler: + * 1. Validates API key has access to the organization + * 2. Verifies data source belongs to API key's organization + * 3. Validates SQL permissions against user's permissioned datasets + * 4. Executes the query with retry logic and timeout handling + * 5. Returns the data with metadata and pagination info + * + * @param request - The SQL query request containing data_source_id and sql + * @param apiKeyContext - The authenticated API key context + * @returns The query results with metadata + */ +export async function runSqlHandler( + request: RunSqlRequest, + apiKeyContext: ApiKeyContext +): Promise { + const { organizationId, ownerId } = apiKeyContext; + + // Get data source details + const dataSource = await getDataSourceById(request.data_source_id); + + if (!dataSource) { + throw new HTTPException(404, { + message: 'Data source not found', + }); + } + + // Verify data source belongs to API key's organization + if (dataSource.organizationId !== organizationId) { + throw new HTTPException(403, { + message: 'You do not have permission to access this data source', + }); + } + + // Validate SQL against user's permissioned datasets + const permissionResult = await validateSqlPermissions(request.sql, ownerId, dataSource.type); + + if (!permissionResult.isAuthorized) { + const errorMessage = + permissionResult.error || + createPermissionErrorMessage( + permissionResult.unauthorizedTables, + permissionResult.unauthorizedColumns + ); + + throw new HTTPException(403, { + message: errorMessage, + }); + } + + // Get data source credentials from vault + let credentials: Credentials; + try { + const rawCredentials = await getDataSourceCredentials({ + dataSourceId: request.data_source_id, + }); + + // Validate credential type matches data source type + if (rawCredentials.type && rawCredentials.type !== dataSource.type) { + console.warn( + `Credential type mismatch: credentials have type '${rawCredentials.type}' but data source has type '${dataSource.type}'. Using data source type.` + ); + } + + // Use data source type as the source of truth + credentials = { + ...rawCredentials, + type: dataSource.type, + } as Credentials; + } catch (error) { + console.error('Failed to retrieve data source credentials:', error); + throw new HTTPException(500, { + message: 'Failed to access data source', + }); + } + + // Execute query using the shared utility with 5000 row limit + try { + // Request one extra row to detect if there are more records + const result = await executeMetricQuery(request.data_source_id, request.sql, credentials, { + maxRows: 5001, + timeout: 60000, // 60 seconds + retryDelays: [1000, 3000, 6000], // 1s, 3s, 6s + }); + + // Trim to 5000 rows and check if there are more records + const hasMore = result.data.length > 5000; + const trimmedData = result.data.slice(0, 5000); + + // Use the data source's hasMoreRecords if available, otherwise use our local check + // This ensures we have a single source of truth for pagination state + const hasMoreRecords = result.hasMoreRecords !== undefined ? result.hasMoreRecords : hasMore; + + const response: RunSqlResponse = { + data: trimmedData, + data_metadata: result.dataMetadata, + has_more_records: hasMoreRecords, + }; + + return response; + } catch (error) { + console.error('Query execution failed:', error); + + if (error instanceof Error) { + throw new HTTPException(500, { + message: `Query execution failed: ${error.message}`, + }); + } + + throw new HTTPException(500, { + message: 'Query execution failed', + }); + } +} diff --git a/apps/server/src/api/v2/tools/sql/index.ts b/apps/server/src/api/v2/tools/sql/index.ts new file mode 100644 index 000000000..5cb5c2e1a --- /dev/null +++ b/apps/server/src/api/v2/tools/sql/index.ts @@ -0,0 +1,41 @@ +import { RunSqlRequestSchema } from '@buster/server-shared'; +import { zValidator } from '@hono/zod-validator'; +import { Hono } from 'hono'; +import { HTTPException } from 'hono/http-exception'; +import { createApiKeyAuthMiddleware } from '../../../../middleware/api-key-auth'; +import { runSqlHandler } from './POST'; + +const app = new Hono() + // Apply API key authentication middleware to all routes + .use('*', createApiKeyAuthMiddleware()) + + // POST /tools/sql - Execute SQL query against data source with API key auth + .post('/', zValidator('json', RunSqlRequestSchema), async (c) => { + const request = c.req.valid('json'); + const apiKeyContext = c.get('apiKey'); + + if (!apiKeyContext) { + throw new HTTPException(401, { + message: 'API key authentication required', + }); + } + + const response = await runSqlHandler(request, apiKeyContext); + + return c.json(response); + }) + + // Error handler for SQL tool routes + .onError((err, c) => { + console.error('SQL tool API error:', err); + + // Let HTTPException responses pass through + if (err instanceof HTTPException) { + return err.getResponse(); + } + + // Default error response + return c.json({ error: 'Internal server error' }, 500); + }); + +export default app; diff --git a/packages/ai/src/agents/analytics-engineer-agent/create-analytics-engineer-toolset.ts b/packages/ai/src/agents/analytics-engineer-agent/create-analytics-engineer-toolset.ts index 120402bc8..1a843bccf 100644 --- a/packages/ai/src/agents/analytics-engineer-agent/create-analytics-engineer-toolset.ts +++ b/packages/ai/src/agents/analytics-engineer-agent/create-analytics-engineer-toolset.ts @@ -6,6 +6,8 @@ import { LS_TOOL_NAME, MULTI_EDIT_FILE_TOOL_NAME, READ_FILE_TOOL_NAME, + RETRIEVE_METADATA_TOOL_NAME, + RUN_SQL_TOOL_NAME, TODO_WRITE_TOOL_NAME, WRITE_FILE_TOOL_NAME, createBashTool, @@ -15,6 +17,8 @@ import { createLsTool, createMultiEditFileTool, createReadFileTool, + createRetrieveMetadataTool, + createRunSqlTool, createTaskTool, createTodoWriteTool, createWriteFileTool, @@ -60,6 +64,15 @@ export async function createAnalyticsEngineerToolset( workingDirectory: analyticsEngineerAgentOptions.folder_structure, todosList: analyticsEngineerAgentOptions.todosList, }); + const runSqlTool = createRunSqlTool({ + messageId: analyticsEngineerAgentOptions.messageId, + apiKey: process.env.BUSTER_API_KEY || '', + apiUrl: process.env.BUSTER_API_URL || 'http://localhost:3000', + }); + const retrieveMetadataTool = createRetrieveMetadataTool({ + apiKey: process.env.BUSTER_API_KEY || '', + apiUrl: process.env.BUSTER_API_URL || 'http://localhost:3000', + }); // Conditionally create task tool (only for main agent, not for subagents) const taskTool = !analyticsEngineerAgentOptions.isSubagent ? createTaskTool({ @@ -87,6 +100,8 @@ export async function createAnalyticsEngineerToolset( [MULTI_EDIT_FILE_TOOL_NAME]: multiEditFileTool, [LS_TOOL_NAME]: lsTool, [TODO_WRITE_TOOL_NAME]: todosTool, + [RUN_SQL_TOOL_NAME]: runSqlTool, + [RETRIEVE_METADATA_TOOL_NAME]: retrieveMetadataTool, ...(taskTool ? { taskTool } : {}), }; } diff --git a/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata-execute.ts b/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata-execute.ts new file mode 100644 index 000000000..0dd7d8bd4 --- /dev/null +++ b/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata-execute.ts @@ -0,0 +1,83 @@ +import { wrapTraced } from 'braintrust'; +import { + RETRIEVE_METADATA_TOOL_NAME, + type RetrieveMetadataContext, + type RetrieveMetadataInput, + type RetrieveMetadataOutput, +} from './retrieve-metadata'; + +/** + * Retrieve dataset metadata via API endpoint + */ +async function executeApiRequest( + database: string, + schema: string, + name: string, + context: RetrieveMetadataContext +): Promise<{ + success: boolean; + data?: RetrieveMetadataOutput; + error?: string; +}> { + try { + // Build query string + const params = new URLSearchParams({ + database, + schema, + name, + }); + + const apiEndpoint = `${context.apiUrl}/api/v2/tools/metadata?${params.toString()}`; + + const response = await fetch(apiEndpoint, { + method: 'GET', + headers: { + Authorization: `Bearer ${context.apiKey}`, + }, + }); + + if (!response.ok) { + const errorData = (await response.json().catch(() => ({}))) as { error?: string }; + const errorMessage = errorData.error || `HTTP ${response.status}: ${response.statusText}`; + + return { + success: false, + error: errorMessage, + }; + } + + const result = (await response.json()) as RetrieveMetadataOutput; + + return { + success: true, + data: result, + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Metadata retrieval failed'; + + return { + success: false, + error: errorMessage, + }; + } +} + +// Factory function that creates the execute function with proper context typing +export function createRetrieveMetadataExecute(context: RetrieveMetadataContext) { + return wrapTraced( + async (input: RetrieveMetadataInput): Promise => { + const { database, schema, name } = input; + + // Execute API request + const result = await executeApiRequest(database, schema, name, context); + + if (result.success && result.data) { + return result.data; + } + + // Throw error if retrieval failed + throw new Error(result.error || 'Metadata retrieval failed'); + }, + { name: RETRIEVE_METADATA_TOOL_NAME } + ); +} diff --git a/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata.ts b/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata.ts new file mode 100644 index 000000000..954a26fbc --- /dev/null +++ b/packages/ai/src/tools/database-tools/retrieve-metadata/retrieve-metadata.ts @@ -0,0 +1,39 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import { createRetrieveMetadataExecute } from './retrieve-metadata-execute'; + +export const RETRIEVE_METADATA_TOOL_NAME = 'retrieveMetadata'; + +export const RetrieveMetadataInputSchema = z.object({ + database: z.string().min(1).describe('Database name where the dataset resides'), + schema: z.string().min(1).describe('Schema name where the dataset resides'), + name: z.string().min(1).describe('Dataset/table name'), +}); + +const RetrieveMetadataContextSchema = z.object({ + apiKey: z.string().describe('API key for authentication'), + apiUrl: z.string().describe('Base URL of the API server'), +}); + +export type RetrieveMetadataInput = z.infer; +export type RetrieveMetadataContext = z.infer; + +const RetrieveMetadataOutputSchema = z.object({ + metadata: z + .record(z.unknown()) + .describe('Dataset metadata containing column profiles and statistics'), +}); + +export type RetrieveMetadataOutput = z.infer; + +// Factory function to create the retrieve-metadata tool +export function createRetrieveMetadataTool(context: RetrieveMetadataContext) { + return tool({ + description: `Use this to retrieve metadata about a dataset from the data warehouse. + This tool fetches detailed metadata including column profiles, statistics, distributions, and other analytical information about tables/datasets. + The metadata can help understand data structure, quality, and characteristics before querying.`, + inputSchema: RetrieveMetadataInputSchema, + outputSchema: RetrieveMetadataOutputSchema, + execute: createRetrieveMetadataExecute(context), + }); +} diff --git a/packages/ai/src/tools/database-tools/run-sql/run-sql-execute.ts b/packages/ai/src/tools/database-tools/run-sql/run-sql-execute.ts new file mode 100644 index 000000000..b81fab6c4 --- /dev/null +++ b/packages/ai/src/tools/database-tools/run-sql/run-sql-execute.ts @@ -0,0 +1,138 @@ +import { wrapTraced } from 'braintrust'; +import { + RUN_SQL_TOOL_NAME, + type RunSqlContext, + type RunSqlInput, + type RunSqlOutput, +} from './run-sql'; + +/** + * Execute SQL query via API endpoint + */ +async function executeApiRequest( + dataSourceId: string, + sql: string, + context: RunSqlContext +): Promise<{ + success: boolean; + data?: RunSqlOutput; + error?: string; +}> { + const MAX_RETRIES = 3; + const RETRY_DELAYS = [1000, 3000, 6000]; // 1s, 3s, 6s + + if (!sql.trim()) { + return { success: false, error: 'SQL query cannot be empty' }; + } + + // Attempt execution with retries + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const apiEndpoint = `${context.apiUrl}/api/v2/tools/sql`; + + const response = await fetch(apiEndpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${context.apiKey}`, + }, + body: JSON.stringify({ + data_source_id: dataSourceId, + sql, + }), + }); + + if (!response.ok) { + const errorData = (await response.json().catch(() => ({}))) as { error?: string }; + const errorMessage = errorData.error || `HTTP ${response.status}: ${response.statusText}`; + + // Check if error is timeout-related + const isTimeout = + errorMessage.toLowerCase().includes('timeout') || + errorMessage.toLowerCase().includes('timed out'); + + if (isTimeout && attempt < MAX_RETRIES) { + // Wait before retry + const delay = RETRY_DELAYS[attempt] || 6000; + console.warn( + `[run-sql] Query timeout on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`, + { + sql: `${sql.substring(0, 100)}...`, + attempt: attempt + 1, + nextDelay: delay, + } + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + continue; // Retry + } + + // Not a timeout or no more retries + return { + success: false, + error: errorMessage, + }; + } + + const result = (await response.json()) as RunSqlOutput; + + return { + success: true, + data: result, + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'SQL execution failed'; + const isTimeout = + errorMessage.toLowerCase().includes('timeout') || + errorMessage.toLowerCase().includes('timed out') || + errorMessage.toLowerCase().includes('fetch failed'); + + if (isTimeout && attempt < MAX_RETRIES) { + // Wait before retry + const delay = RETRY_DELAYS[attempt] || 6000; + console.warn( + `[run-sql] Query timeout (exception) on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`, + { + sql: `${sql.substring(0, 100)}...`, + attempt: attempt + 1, + nextDelay: delay, + error: errorMessage, + } + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + continue; // Retry + } + + // Not a timeout or no more retries + return { + success: false, + error: errorMessage, + }; + } + } + + // Should not reach here, but just in case + return { + success: false, + error: 'Max retries exceeded for SQL execution', + }; +} + +// Factory function that creates the execute function with proper context typing +export function createRunSqlExecute(context: RunSqlContext) { + return wrapTraced( + async (input: RunSqlInput): Promise => { + const { data_source_id, sql } = input; + + // Execute SQL via API + const result = await executeApiRequest(data_source_id, sql, context); + + if (result.success && result.data) { + return result.data; + } + + // Throw error if execution failed + throw new Error(result.error || 'Query execution failed'); + }, + { name: RUN_SQL_TOOL_NAME } + ); +} diff --git a/packages/ai/src/tools/database-tools/run-sql/run-sql.ts b/packages/ai/src/tools/database-tools/run-sql/run-sql.ts new file mode 100644 index 000000000..ed93089c0 --- /dev/null +++ b/packages/ai/src/tools/database-tools/run-sql/run-sql.ts @@ -0,0 +1,49 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import { createRunSqlExecute } from './run-sql-execute'; + +export const RUN_SQL_TOOL_NAME = 'runSql'; + +export const RunSqlInputSchema = z.object({ + data_source_id: z.string().uuid().describe('UUID of the data source to execute SQL against'), + sql: z + .string() + .min(1) + .describe( + `SQL query to execute. + YOU MUST USE THE . syntax/qualifier for all table names. + NEVER use SELECT * on physical tables - for security purposes you must explicitly select the columns you intend to use. + NEVER query system tables or use 'SHOW' statements as these will fail to execute. + Queries without these requirements will fail to execute.` + ), +}); + +const RunSqlContextSchema = z.object({ + apiKey: z.string().describe('API key for authentication'), + apiUrl: z.string().describe('Base URL of the API server'), + messageId: z.string().describe('Message ID for database updates'), +}); + +export type RunSqlInput = z.infer; +export type RunSqlContext = z.infer; + +const RunSqlOutputSchema = z.object({ + data: z.array(z.record(z.unknown())).describe('Query results'), + data_metadata: z.record(z.unknown()).describe('Metadata about the data columns'), + has_more_records: z.boolean().describe('Whether there are more records available'), +}); + +export type RunSqlOutput = z.infer; + +// Factory function to create the run-sql tool +export function createRunSqlTool(context: RunSqlContext) { + return tool({ + description: `Use this to run SQL queries against a data source via API. + This tool executes queries remotely and returns results with metadata. + You must use the . syntax/qualifier for all table names. + Results are limited to 5000 rows for performance.`, + inputSchema: RunSqlInputSchema, + outputSchema: RunSqlOutputSchema, + execute: createRunSqlExecute(context), + }); +} diff --git a/packages/ai/src/tools/index.ts b/packages/ai/src/tools/index.ts index c43cb11ed..a2c0aaffb 100644 --- a/packages/ai/src/tools/index.ts +++ b/packages/ai/src/tools/index.ts @@ -46,6 +46,11 @@ export { createExecuteSqlTool, EXECUTE_SQL_TOOL_NAME, } from './database-tools/execute-sql/execute-sql'; +export { createRunSqlTool, RUN_SQL_TOOL_NAME } from './database-tools/run-sql/run-sql'; +export { + createRetrieveMetadataTool, + RETRIEVE_METADATA_TOOL_NAME, +} from './database-tools/retrieve-metadata/retrieve-metadata'; export { executeSqlDocsAgent } from './database-tools/super-execute-sql/super-execute-sql'; // File tools diff --git a/packages/ai/src/tools/planning-thinking-tools/todo-write-tool/todo-write-tool.ts b/packages/ai/src/tools/planning-thinking-tools/todo-write-tool/todo-write-tool.ts index a0a548474..9dddb22e5 100644 --- a/packages/ai/src/tools/planning-thinking-tools/todo-write-tool/todo-write-tool.ts +++ b/packages/ai/src/tools/planning-thinking-tools/todo-write-tool/todo-write-tool.ts @@ -20,7 +20,10 @@ const TodoWriteToolInputSchema = z.object({ const TodoWriteToolContextSchema = z.object({ chatId: z.string().describe('The chat/conversation ID to associate todos with'), workingDirectory: z.string().describe('The working directory for the chat'), - todosList: z.array(TodoItemSchema).default([]).describe('In-memory array of todo items to manipulate'), + todosList: z + .array(TodoItemSchema) + .default([]) + .describe('In-memory array of todo items to manipulate'), }); export type TodoWriteToolInput = z.infer; diff --git a/packages/database/src/queries/datasets/get-dataset-metadata.ts b/packages/database/src/queries/datasets/get-dataset-metadata.ts new file mode 100644 index 000000000..c59f1e9e1 --- /dev/null +++ b/packages/database/src/queries/datasets/get-dataset-metadata.ts @@ -0,0 +1,42 @@ +import { and, eq, isNull } from 'drizzle-orm'; +import { z } from 'zod'; +import { db } from '../../connection'; +import { datasets } from '../../schema'; + +/** + * Parameters for getting dataset metadata + */ +const GetDatasetMetadataParamsSchema = z.object({ + database: z.string().describe('Database name'), + schema: z.string().describe('Schema name'), + name: z.string().describe('Dataset/table name'), + organizationId: z.string().uuid().describe('Organization UUID'), +}); + +type GetDatasetMetadataParams = z.infer; + +/** + * Get dataset metadata by database, schema, name, and organization + * Returns only the metadata column from the matching dataset + */ +export async function getDatasetMetadata(params: GetDatasetMetadataParams) { + const validated = GetDatasetMetadataParamsSchema.parse(params); + + const conditions = [ + eq(datasets.databaseName, validated.database), + eq(datasets.schema, validated.schema), + eq(datasets.name, validated.name), + eq(datasets.organizationId, validated.organizationId), + isNull(datasets.deletedAt), + ]; + + const result = await db + .select({ + metadata: datasets.metadata, + }) + .from(datasets) + .where(and(...conditions)) + .limit(1); + + return result[0] || null; +} diff --git a/packages/database/src/queries/datasets/index.ts b/packages/database/src/queries/datasets/index.ts index 7de34f312..e4f9063c9 100644 --- a/packages/database/src/queries/datasets/index.ts +++ b/packages/database/src/queries/datasets/index.ts @@ -7,3 +7,4 @@ export { getDatasetsWithYml, getDatasetsWithYmlByOrganization } from './get-data export { getDatasetById, type Dataset } from './get-dataset-by-id'; export { getDataSourceWithDetails } from './get-data-source-with-details'; export { updateDatasetMetadata } from './update-dataset-metadata'; +export { getDatasetMetadata } from './get-dataset-metadata'; diff --git a/packages/server-shared/src/index.ts b/packages/server-shared/src/index.ts index 2b434bd91..3891a679c 100644 --- a/packages/server-shared/src/index.ts +++ b/packages/server-shared/src/index.ts @@ -18,6 +18,7 @@ export * from './dictionary'; export * from './docs'; export * from './github'; export * from './message'; +export * from './metadata'; export * from './metrics'; export * from './organization'; export * from './public-chat'; diff --git a/packages/server-shared/src/metadata/index.ts b/packages/server-shared/src/metadata/index.ts new file mode 100644 index 000000000..4b1792ddb --- /dev/null +++ b/packages/server-shared/src/metadata/index.ts @@ -0,0 +1,16 @@ +import type { DatasetMetadata } from '@buster/database/schema-types'; +import { z } from 'zod'; + +// Request schema for getting dataset metadata +export const GetMetadataRequestSchema = z.object({ + database: z.string().min(1, 'Database name cannot be empty'), + schema: z.string().min(1, 'Schema name cannot be empty'), + name: z.string().min(1, 'Dataset name cannot be empty'), +}); + +export type GetMetadataRequest = z.infer; + +// Response type for metadata retrieval +export interface GetMetadataResponse { + metadata: DatasetMetadata; +}