mirror of https://github.com/buster-so/buster.git
1599 lines
54 KiB
TypeScript
1599 lines
54 KiB
TypeScript
import { randomUUID } from 'node:crypto';
|
|
import type { DataSource } from '@buster/data-source';
|
|
import { assetPermissions, db, metricFiles } from '@buster/database';
|
|
import type { RuntimeContext } from '@mastra/core/runtime-context';
|
|
import { createTool } from '@mastra/core/tools';
|
|
import { wrapTraced } from 'braintrust';
|
|
import { inArray } from 'drizzle-orm';
|
|
import * as yaml from 'yaml';
|
|
import { z } from 'zod';
|
|
import { getWorkflowDataSourceManager } from '../../utils/data-source-manager';
|
|
import { createPermissionErrorMessage, validateSqlPermissions } from '../../utils/sql-permissions';
|
|
import type { AnalystRuntimeContext } from '../../workflows/analyst-workflow';
|
|
import { validateAndAdjustBarLineAxes } from './bar-line-axis-validator';
|
|
import { trackFileAssociations } from './file-tracking-helper';
|
|
import { createInitialMetricVersionHistory, validateMetricYml } from './version-history-helpers';
|
|
import type { MetricYml } from './version-history-types';
|
|
|
|
// TypeScript types matching Rust DataMetadata structure
|
|
enum SimpleType {
|
|
Number = 'number',
|
|
String = 'string',
|
|
Date = 'date',
|
|
Boolean = 'boolean',
|
|
Other = 'other',
|
|
}
|
|
|
|
enum ColumnType {
|
|
Int2 = 'int2',
|
|
Int4 = 'int4',
|
|
Int8 = 'int8',
|
|
Float4 = 'float4',
|
|
Float8 = 'float8',
|
|
Varchar = 'varchar',
|
|
Text = 'text',
|
|
Bool = 'bool',
|
|
Date = 'date',
|
|
Timestamp = 'timestamp',
|
|
Timestamptz = 'timestamptz',
|
|
Other = 'other',
|
|
}
|
|
|
|
interface ColumnMetaData {
|
|
name: string;
|
|
min_value: unknown;
|
|
max_value: unknown;
|
|
unique_values: number;
|
|
simple_type: SimpleType;
|
|
type: ColumnType;
|
|
}
|
|
|
|
interface DataMetadata {
|
|
column_count: number;
|
|
row_count: number;
|
|
column_metadata: ColumnMetaData[];
|
|
}
|
|
|
|
/**
|
|
* Analyzes query results to create DataMetadata structure
|
|
*/
|
|
function createDataMetadata(results: Record<string, unknown>[]): DataMetadata {
|
|
if (!results.length) {
|
|
return {
|
|
column_count: 0,
|
|
row_count: 0,
|
|
column_metadata: [],
|
|
};
|
|
}
|
|
|
|
const columnNames = Object.keys(results[0] || {});
|
|
const columnMetadata: ColumnMetaData[] = [];
|
|
|
|
for (const columnName of columnNames) {
|
|
const values = results
|
|
.map((row) => row[columnName])
|
|
.filter((v) => v !== null && v !== undefined);
|
|
|
|
// Determine column type based on the first non-null value
|
|
let columnType = ColumnType.Other;
|
|
let simpleType = SimpleType.Other;
|
|
|
|
if (values.length > 0) {
|
|
const firstValue = values[0];
|
|
|
|
if (typeof firstValue === 'number') {
|
|
columnType = Number.isInteger(firstValue) ? ColumnType.Int4 : ColumnType.Float8;
|
|
simpleType = SimpleType.Number;
|
|
} else if (typeof firstValue === 'boolean') {
|
|
columnType = ColumnType.Bool;
|
|
simpleType = SimpleType.Boolean;
|
|
} else if (firstValue instanceof Date) {
|
|
columnType = ColumnType.Timestamp;
|
|
simpleType = SimpleType.Date;
|
|
} else if (typeof firstValue === 'string') {
|
|
// Check if it's a numeric string first
|
|
if (!Number.isNaN(Number(firstValue))) {
|
|
columnType = Number.isInteger(Number(firstValue)) ? ColumnType.Int4 : ColumnType.Float8;
|
|
simpleType = SimpleType.Number;
|
|
} else if (
|
|
!Number.isNaN(Date.parse(firstValue)) &&
|
|
// Additional check to avoid parsing simple numbers as dates
|
|
(firstValue.includes('-') || firstValue.includes('/') || firstValue.includes(':'))
|
|
) {
|
|
columnType = ColumnType.Timestamp;
|
|
simpleType = SimpleType.Date;
|
|
} else {
|
|
columnType = ColumnType.Varchar;
|
|
simpleType = SimpleType.String;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate min/max values
|
|
let minValue: unknown = null;
|
|
let maxValue: unknown = null;
|
|
|
|
if (values.length > 0) {
|
|
if (simpleType === SimpleType.Number) {
|
|
const numValues = values
|
|
.map((v) => {
|
|
if (typeof v === 'number') return v;
|
|
if (typeof v === 'string' && !Number.isNaN(Number(v))) return Number(v);
|
|
return null;
|
|
})
|
|
.filter((v) => v !== null) as number[];
|
|
if (numValues.length > 0) {
|
|
minValue = Math.min(...numValues);
|
|
maxValue = Math.max(...numValues);
|
|
}
|
|
} else if (simpleType === SimpleType.Date) {
|
|
const dateValues = values
|
|
.map((v) => {
|
|
if (v instanceof Date) return v;
|
|
if (typeof v === 'string') {
|
|
const parsed = new Date(v);
|
|
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
|
}
|
|
return null;
|
|
})
|
|
.filter((d) => d !== null) as Date[];
|
|
|
|
if (dateValues.length > 0) {
|
|
minValue = new Date(Math.min(...dateValues.map((d) => d.getTime())));
|
|
maxValue = new Date(Math.max(...dateValues.map((d) => d.getTime())));
|
|
}
|
|
} else if (simpleType === SimpleType.String) {
|
|
const strValues = values.filter((v) => typeof v === 'string') as string[];
|
|
if (strValues.length > 0) {
|
|
minValue = strValues.sort()[0];
|
|
maxValue = strValues.sort().reverse()[0];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate unique values count
|
|
const uniqueValues = new Set(values).size;
|
|
|
|
columnMetadata.push({
|
|
name: columnName.toLowerCase(),
|
|
min_value: minValue,
|
|
max_value: maxValue,
|
|
unique_values: uniqueValues,
|
|
simple_type: simpleType,
|
|
type: columnType,
|
|
});
|
|
}
|
|
|
|
return {
|
|
column_count: columnNames.length,
|
|
row_count: results.length,
|
|
column_metadata: columnMetadata,
|
|
};
|
|
}
|
|
|
|
|
|
/**
|
|
* Ensures timeFrame values are properly quoted in YAML content
|
|
* Finds timeFrame: value and wraps the value in quotes if not already quoted
|
|
*/
|
|
function ensureTimeFrameQuoted(ymlContent: string): string {
|
|
// Regex to match timeFrame field with its value
|
|
// Captures: timeFrame + whitespace + : + whitespace + value (until end of line)
|
|
const timeFrameRegex = /(timeFrame\s*:\s*)([^\r\n]+)/g;
|
|
|
|
return ymlContent.replace(timeFrameRegex, (match, prefix, value) => {
|
|
// Trim whitespace from the value
|
|
const trimmedValue = value.trim();
|
|
|
|
// Check if value is already properly quoted (starts and ends with same quote type)
|
|
const isAlreadyQuoted =
|
|
(trimmedValue.startsWith('"') && trimmedValue.endsWith('"')) ||
|
|
(trimmedValue.startsWith("'") && trimmedValue.endsWith("'"));
|
|
|
|
if (isAlreadyQuoted) {
|
|
// Already quoted, return as is
|
|
return match;
|
|
}
|
|
|
|
// Not quoted, wrap in double quotes
|
|
return `${prefix}"${trimmedValue}"`;
|
|
});
|
|
}
|
|
|
|
// Core interfaces matching Rust structs
|
|
interface MetricFileParams {
|
|
name: string;
|
|
yml_content: string;
|
|
}
|
|
|
|
// Zod schema for validating result metadata from DataSource
|
|
const resultMetadataSchema = z
|
|
.object({
|
|
totalRowCount: z.number().optional(),
|
|
limited: z.boolean().optional(),
|
|
maxRows: z.number().optional(),
|
|
})
|
|
.optional();
|
|
|
|
type ResultMetadata = z.infer<typeof resultMetadataSchema>;
|
|
|
|
interface QueryMetadata {
|
|
rowCount: number;
|
|
totalRowCount: number;
|
|
executionTime: number;
|
|
limited: boolean;
|
|
maxRows?: number;
|
|
}
|
|
|
|
interface ValidationResult {
|
|
success: boolean;
|
|
message?: string;
|
|
results?: Record<string, unknown>[];
|
|
metadata?: QueryMetadata;
|
|
error?: string;
|
|
}
|
|
|
|
interface MetricFileResult {
|
|
success: boolean;
|
|
metricFile?: FileWithId;
|
|
metricYml?: MetricYml;
|
|
message?: string;
|
|
results?: Record<string, unknown>[];
|
|
error?: string;
|
|
}
|
|
|
|
interface CreateMetricFilesParams {
|
|
files: MetricFileParams[];
|
|
}
|
|
|
|
interface FailedFileCreation {
|
|
name: string;
|
|
error: string;
|
|
}
|
|
|
|
interface FileWithId {
|
|
id: string;
|
|
name: string;
|
|
file_type: string;
|
|
result_message?: string;
|
|
results?: Record<string, unknown>[];
|
|
created_at: string;
|
|
updated_at: string;
|
|
version_number: number;
|
|
}
|
|
|
|
interface CreateMetricFilesOutput {
|
|
message: string;
|
|
duration: number;
|
|
files: FileWithId[];
|
|
failed_files: FailedFileCreation[];
|
|
}
|
|
|
|
// Tool implementation with complete schema included
|
|
export const createMetrics = createTool({
|
|
id: 'create-metrics-file',
|
|
description: `Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics. **This tool supports creating multiple metrics in a single call; prefer using bulk creation over creating metrics one by one.**
|
|
|
|
Only utilize the required/default fields unless the user specifically requests that optional fields be added.
|
|
|
|
## COMPLETE METRIC YAML SCHEMA SPECIFICATION
|
|
|
|
\`\`\`
|
|
# METRIC CONFIGURATION - YML STRUCTURE
|
|
# -------------------------------------
|
|
# REQUIRED Top-Level Fields: \`name\`, \`description\`, \`timeFrame\`, \`sql\`, \`chartConfig\`
|
|
#
|
|
# --- FIELD DETAILS & RULES ---
|
|
# \`name\`: Human-readable title (e.g., Total Sales).
|
|
# - RULE: CANNOT contain underscores (\`_\`). Use spaces instead.
|
|
# \`description\`: Detailed explanation of the metric.
|
|
# \`timeFrame\`: Human-readable time period covered by the query, similar to a filter in a BI tool. MUST BE A VALID STRING.
|
|
# - If doing 2024 as an example, you must do "2024" can't parse as a number.
|
|
# - For queries with fixed date filters, use specific date ranges, e.g., "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025".
|
|
# - For queries with relative date filters or no date filter, use relative terms, e.g., "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time", etc.
|
|
# - For comparisons, use "Comparison - [Period 1] vs [Period 2]", with each period formatted according to whether it is fixed or relative, e.g., "Comparison - Last 30 days vs Previous 30 days" or "Comparison - June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025".
|
|
# Rules:
|
|
# - Must accurately reflect the date/time filter used in the \`sql\` field. Do not misrepresent the time range.
|
|
# - Use full month names for dates, e.g., "January", not "Jan".
|
|
# - Follow general quoting rules. CANNOT contain ':'.
|
|
# Note: Respond only with the time period, without explanation or additional copy.
|
|
# \`sql\`: The SQL query for the metric.
|
|
# - RULE: MUST use the pipe \`|\` block scalar style to preserve formatting and newlines.
|
|
# - NOTE: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This applies to all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
|
|
# - Example:
|
|
# sql: |
|
|
# SELECT ...
|
|
# \`chartConfig\`: Visualization settings.
|
|
# - RULE: Must contain \`selectedChartType\` (bar, line, scatter, pie, combo, metric, table).
|
|
# - RULE: Must contain \`columnLabelFormats\` defining format for ALL columns in the SQL result.
|
|
# - RULE: Must contain ONE chart-specific config block based on \`selectedChartType\`:
|
|
# - \`barAndLineAxis\` (for type: bar, line)
|
|
# - \`scatterAxis\` (for type: scatter)
|
|
# - \`pieChartAxis\` (for type: pie)
|
|
# - \`comboChartAxis\` (for type: combo)
|
|
# - \`metricColumnId\` (for type: metric)
|
|
# - \`tableConfig\` (for type: table) - [Optional, if needed beyond basic columns]
|
|
#
|
|
# --- GENERAL YAML RULES ---
|
|
# 1. Use standard YAML syntax (indentation, colons for key-value, \`-\` for arrays).
|
|
# 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (\`"..."\`) ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, \`) or needs to preserve leading/trailing whitespace.
|
|
# 3. Metric name, timeframe, or description CANNOT contain \`:\`
|
|
# -------------------------------------
|
|
|
|
# --- FORMAL SCHEMA --- (Used for validation, reflects rules above)
|
|
type: object
|
|
name: Metric Configuration Schema
|
|
description: Metric definition with SQL query and visualization settings
|
|
|
|
properties:
|
|
# NAME
|
|
name:
|
|
required: true
|
|
type: string
|
|
description: Human-readable title (e.g., Total Sales). NO underscores. Follow quoting rules. Should not contain \`:\`
|
|
|
|
# DESCRIPTION
|
|
description:
|
|
required: true
|
|
type: string
|
|
description: |
|
|
A natural language description of the metric, essentially rephrasing the 'name' field as a question or statement.
|
|
Example: If name is "Total Sales", description could be "What are the total sales?".
|
|
RULE: Should NOT describe the chart type, axes, or any visualization aspects.
|
|
RULE: Follow general quoting rules.
|
|
RULE: Should not contain ':'.
|
|
|
|
# TIME FRAME
|
|
timeFrame:
|
|
required: true
|
|
type: string
|
|
description: |
|
|
Human-readable time period covered by the SQL query, similar to a filter in a BI tool.
|
|
RULE: Must accurately reflect the date/time filter used in the \`sql\` field. Do not misrepresent the time range.
|
|
Examples:
|
|
- Fixed Dates: "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025"
|
|
- Relative Dates: "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time"
|
|
- Comparisons: Use the format "Comparison: [Period 1] vs [Period 2]". Examples:
|
|
- "Comparison: Last 30 days vs Previous 30 days"
|
|
- "Comparison: June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025"
|
|
RULE: Use full month names for dates, e.g., "January", not "Jan".
|
|
RULE: Follow general quoting rules. CANNOT contain ':'.
|
|
|
|
# SQL QUERY
|
|
sql:
|
|
required: true
|
|
type: string
|
|
description: |
|
|
SQL query using YAML pipe syntax (|).
|
|
The SQL query should be formatted with proper indentation using the YAML pipe (|) syntax.
|
|
This ensures the multi-line SQL is properly parsed while preserving whitespace and newlines.
|
|
IMPORTANT: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This rule is critical for all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
|
|
Example:
|
|
sql: |
|
|
SELECT column1, column2
|
|
FROM my_table
|
|
WHERE condition;
|
|
|
|
# CHART CONFIGURATION
|
|
chartConfig:
|
|
required: true
|
|
description: Visualization settings (must include selectedChartType, columnLabelFormats, and ONE chart-specific block)
|
|
allOf: # Base requirements for ALL chart types
|
|
- \$ref: '#/definitions/base_chart_config'
|
|
oneOf: # Specific block required based on type
|
|
- \$ref: #/definitions/bar_line_chart_config
|
|
- \$ref: #/definitions/scatter_chart_config
|
|
- \$ref: #/definitions/pie_chart_config
|
|
- \$ref: #/definitions/combo_chart_config
|
|
- \$ref: #/definitions/metric_chart_config
|
|
- \$ref: #/definitions/table_chart_config
|
|
|
|
required:
|
|
- name
|
|
- timeFrame
|
|
- sql
|
|
- chartConfig
|
|
|
|
definitions:
|
|
# BASE CHART CONFIG (common parts used by ALL chart types)
|
|
base_chart_config:
|
|
type: object
|
|
properties:
|
|
selectedChartType:
|
|
type: string
|
|
description: Chart type (bar, line, scatter, pie, combo, metric, table)
|
|
enum: [bar, line, scatter, pie, combo, metric, table]
|
|
columnLabelFormats:
|
|
type: object
|
|
description: REQUIRED formatting for ALL columns returned by the SQL query.
|
|
additionalProperties:
|
|
\$ref: #/definitions/column_label_format
|
|
# Optional base properties below
|
|
columnSettings:
|
|
type: object
|
|
description: |-
|
|
Visual settings applied per column.
|
|
Keys MUST be LOWERCASE column names from the SQL query results.
|
|
Example: \`total_sales: { showDataLabels: true }\`
|
|
additionalProperties:
|
|
\$ref: #/definitions/column_settings
|
|
colors:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: |
|
|
Default color palette.
|
|
RULE: Hex color codes (e.g., #FF0000) MUST be enclosed in quotes (e.g., "#FF0000" or '#FF0000') because '#' signifies a comment otherwise. Double quotes are preferred for consistency.
|
|
Use this parameter when the user asks about customizing chart colors, unless specified otherwise.
|
|
showLegend:
|
|
type: boolean
|
|
gridLines:
|
|
type: boolean
|
|
showLegendHeadline:
|
|
oneOf:
|
|
- type: boolean
|
|
- type: string
|
|
goalLines:
|
|
type: array
|
|
items:
|
|
\$ref: #/definitions/goal_line
|
|
trendlines:
|
|
type: array
|
|
items:
|
|
\$ref: #/definitions/trendline
|
|
disableTooltip:
|
|
type: boolean
|
|
# Axis Configurations
|
|
# RULE: By default, only add \`xAxisConfig\` and ONLY set its \`xAxisTimeInterval\` property
|
|
# when visualizing date/time data on the X-axis (e.g., line, bar, combo charts).
|
|
# Do NOT add other \`xAxisConfig\` properties, \`yAxisConfig\`, or \`y2AxisConfig\`
|
|
# unless the user explicitly asks for specific axis modifications.
|
|
xAxisConfig:
|
|
description: Controls X-axis properties. For date/time axes, MUST contain \`xAxisTimeInterval\` (day, week, month, quarter, year). Other properties control label visibility, title, rotation, and zoom. Only add when needed (dates) or requested by user.
|
|
\$ref: '#/definitions/x_axis_config'
|
|
yAxisConfig:
|
|
description: Controls Y-axis properties. Only add if the user explicitly requests Y-axis modifications (e.g., hiding labels, changing title). Properties control label visibility, title, rotation, and zoom.
|
|
\$ref: '#/definitions/y_axis_config'
|
|
y2AxisConfig:
|
|
description: Controls secondary Y-axis (Y2) properties, primarily for combo charts. Only add if the user explicitly requests Y2-axis modifications. Properties control label visibility, title, rotation, and zoom.
|
|
\$ref: '#/definitions/y2_axis_config'
|
|
categoryAxisStyleConfig:
|
|
description: Optional style configuration for the category axis (color/grouping).
|
|
\$ref: '#/definitions/category_axis_style_config'
|
|
required:
|
|
- selectedChartType
|
|
- columnLabelFormats
|
|
|
|
# AXIS CONFIGURATIONS
|
|
x_axis_config:
|
|
type: object
|
|
properties:
|
|
xAxisTimeInterval:
|
|
type: string
|
|
enum: [day, week, month, quarter, year, 'null']
|
|
description: REQUIRED time interval for grouping date/time values on the X-axis (e.g., for line/combo charts). MUST be set if the X-axis represents time. Default: null.
|
|
xAxisShowAxisLabel:
|
|
type: boolean
|
|
description: Show X-axis labels. Default: true.
|
|
xAxisShowAxisTitle:
|
|
type: boolean
|
|
description: Show X-axis title. Default: true.
|
|
xAxisAxisTitle:
|
|
type: [string, 'null']
|
|
description: X-axis title. Default: null (auto-generates from column names).
|
|
xAxisLabelRotation:
|
|
type: string # Representing numbers or 'auto'
|
|
enum: ["0", "45", "90", auto]
|
|
description: Label rotation. Default: auto.
|
|
xAxisDataZoom:
|
|
type: boolean
|
|
description: Enable data zoom on X-axis. Default: false (User only).
|
|
additionalProperties: false
|
|
required:
|
|
- xAxisTimeInterval
|
|
|
|
y_axis_config:
|
|
type: object
|
|
properties:
|
|
yAxisShowAxisLabel:
|
|
type: boolean
|
|
description: Show Y-axis labels. Default: true.
|
|
yAxisShowAxisTitle:
|
|
type: boolean
|
|
description: Show Y-axis title. Default: true.
|
|
yAxisAxisTitle:
|
|
type: [string, 'null']
|
|
description: Y-axis title. Default: null (uses first plotted column name).
|
|
yAxisStartAxisAtZero:
|
|
type: [boolean, 'null']
|
|
description: Start Y-axis at zero. Default: true.
|
|
yAxisScaleType:
|
|
type: string
|
|
enum: [log, linear]
|
|
description: Scale type for Y-axis. Default: linear.
|
|
additionalProperties: false
|
|
|
|
y2_axis_config:
|
|
type: object
|
|
description: Secondary Y-axis configuration (for combo charts).
|
|
properties:
|
|
y2AxisShowAxisLabel:
|
|
type: boolean
|
|
description: Show Y2-axis labels. Default: true.
|
|
y2AxisShowAxisTitle:
|
|
type: boolean
|
|
description: Show Y2-axis title. Default: true.
|
|
y2AxisAxisTitle:
|
|
type: [string, 'null']
|
|
description: Y2-axis title. Default: null (uses first plotted column name).
|
|
y2AxisStartAxisAtZero:
|
|
type: [boolean, 'null']
|
|
description: Start Y2-axis at zero. Default: true.
|
|
y2AxisScaleType:
|
|
type: string
|
|
enum: [log, linear]
|
|
description: Scale type for Y2-axis. Default: linear.
|
|
additionalProperties: false
|
|
|
|
category_axis_style_config:
|
|
type: object
|
|
description: Style configuration for the category axis (color/grouping).
|
|
properties:
|
|
categoryAxisTitle:
|
|
type: [string, 'null']
|
|
description: Title for the category axis.
|
|
additionalProperties: false
|
|
|
|
# COLUMN FORMATTING
|
|
column_label_format:
|
|
type: object
|
|
properties:
|
|
columnType:
|
|
type: string
|
|
description: number, string, date
|
|
enum: [number, string, date]
|
|
style:
|
|
type: string
|
|
enum:
|
|
- currency # Note: The "$" sign is automatically prepended.
|
|
- percent # Note: "%" sign is appended. For percentage values:
|
|
# - If the value comes directly from a database column, use multiplier: 1
|
|
# - If the value is calculated in your SQL query and not already multiplied by 100, use multiplier: 100
|
|
- number
|
|
- date # Note: For date columns, consider setting xAxisTimeInterval in xAxisConfig to control date grouping (day, week, month, quarter, year)
|
|
- string
|
|
multiplier:
|
|
type: number
|
|
description: Value to multiply the number by before display. Default value is 1. For percentages, the multiplier depends on how the data is sourced: if the value comes directly from a database column, use multiplier: 1; if the value is calculated in your SQL query and not already multiplied by 100, use multiplier: 100.
|
|
displayName:
|
|
type: string
|
|
description: Custom display name for the column
|
|
numberSeparatorStyle:
|
|
type: string
|
|
description: Style for number separators. Your option is ',' or a null value. Not null wrapped in quotes, a null value.
|
|
minimumFractionDigits:
|
|
type: integer
|
|
description: Minimum number of fraction digits to display
|
|
maximumFractionDigits:
|
|
type: integer
|
|
description: Maximum number of fraction digits to display
|
|
prefix:
|
|
type: string
|
|
suffix:
|
|
type: string
|
|
replaceMissingDataWith:
|
|
type: number
|
|
description: Value to display when data is missing, needs to be set to 0. Should only be set on number columns. All others should be set to null.
|
|
compactNumbers:
|
|
type: boolean
|
|
description: Whether to display numbers in compact form (e.g., 1K, 1M)
|
|
currency:
|
|
type: string
|
|
description: Currency code for currency formatting (e.g., USD, EUR)
|
|
dateFormat:
|
|
type: string
|
|
description: |
|
|
Format string for date display (must be compatible with Day.js format strings).
|
|
RULE: Choose format based on xAxisTimeInterval:
|
|
- year: 'YYYY' (e.g., 2025)
|
|
- quarter: '[Q]Q YYYY' (e.g., Q1 2025)
|
|
- month: 'MMM YYYY' (e.g., Jan 2025) or 'MMMM' (e.g., January) if context is clear.
|
|
- week/day: 'MMM D, YYYY' (e.g., Jan 25, 2025) or 'MMM D' (e.g., Jan 25) if context is clear.
|
|
useRelativeTime:
|
|
type: boolean
|
|
description: Whether to display dates as relative time (e.g., 2 days ago)
|
|
isUtc:
|
|
type: boolean
|
|
description: Whether to interpret dates as UTC
|
|
convertNumberTo:
|
|
type: string
|
|
description: Optional. Convert numeric values to time units or date parts. This is a necessity for time series data when numbers are passed instead of the date.
|
|
enum:
|
|
- day_of_week
|
|
- month_of_year
|
|
- quarter
|
|
|
|
required:
|
|
- columnType
|
|
- style
|
|
- replaceMissingDataWith
|
|
- numberSeparatorStyle
|
|
|
|
# COLUMN VISUAL SETTINGS
|
|
column_settings:
|
|
type: object
|
|
description: Optional visual settings per LOWERCASE column name.
|
|
properties:
|
|
showDataLabels:
|
|
type: boolean
|
|
columnVisualization:
|
|
type: string
|
|
enum:
|
|
- bar
|
|
- line
|
|
- dot
|
|
lineWidth:
|
|
type: number
|
|
lineStyle:
|
|
type: string
|
|
enum:
|
|
- area
|
|
- line
|
|
lineType:
|
|
type: string
|
|
enum:
|
|
- normal
|
|
- smooth
|
|
- step
|
|
|
|
# CHART-SPECIFIC CONFIGURATIONS
|
|
bar_line_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- bar
|
|
- line
|
|
barAndLineAxis:
|
|
type: object
|
|
properties:
|
|
x:
|
|
type: array
|
|
items:
|
|
type: string
|
|
y:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: LOWERCASE column name from SQL for X-axis.
|
|
category:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: LOWERCASE column name from SQL for category grouping.
|
|
required:
|
|
- x
|
|
- y
|
|
barLayout:
|
|
type: string
|
|
enum:
|
|
- horizontal
|
|
- vertical
|
|
barGroupType:
|
|
type: string
|
|
enum:
|
|
- stack
|
|
- group
|
|
- percentage-stack
|
|
required:
|
|
- selectedChartType
|
|
- barAndLineAxis
|
|
|
|
scatter_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- scatter
|
|
scatterAxis:
|
|
type: object
|
|
properties:
|
|
x:
|
|
type: array
|
|
items:
|
|
type: string
|
|
y:
|
|
type: array
|
|
items:
|
|
type: string
|
|
category:
|
|
type: array
|
|
items:
|
|
type: string
|
|
size:
|
|
type: array
|
|
items:
|
|
type: string
|
|
required:
|
|
- x
|
|
- y
|
|
required:
|
|
- selectedChartType
|
|
- scatterAxis
|
|
|
|
pie_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- pie
|
|
pieChartAxis:
|
|
type: object
|
|
properties:
|
|
x:
|
|
type: array
|
|
items:
|
|
type: string
|
|
y:
|
|
type: array
|
|
items:
|
|
type: string
|
|
required:
|
|
- x
|
|
- y
|
|
required:
|
|
- selectedChartType
|
|
- pieChartAxis
|
|
|
|
combo_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- combo
|
|
comboChartAxis:
|
|
type: object
|
|
properties:
|
|
x:
|
|
type: array
|
|
items:
|
|
type: string
|
|
y:
|
|
type: array
|
|
items:
|
|
type: string
|
|
y2:
|
|
type: array
|
|
items:
|
|
type: string
|
|
required:
|
|
- x
|
|
- y
|
|
- y2
|
|
required:
|
|
- selectedChartType
|
|
- comboChartAxis
|
|
|
|
metric_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- metric
|
|
metricColumnId:
|
|
type: string
|
|
description: LOWERCASE column name from SQL for the main metric value.
|
|
metricValueAggregate:
|
|
type: string
|
|
enum:
|
|
- sum
|
|
- average
|
|
- median
|
|
- max
|
|
- min
|
|
- count
|
|
- first
|
|
description: Aggregate function for metric value
|
|
metricHeader:
|
|
oneOf:
|
|
- type: string
|
|
description: Simple string title for the metric header
|
|
- type: object
|
|
properties:
|
|
columnId:
|
|
type: string
|
|
description: Which column to use for the header
|
|
useValue:
|
|
type: boolean
|
|
description: Whether to display the key or the value in the chart
|
|
aggregate:
|
|
type: string
|
|
enum:
|
|
- sum
|
|
- average
|
|
- median
|
|
- max
|
|
- min
|
|
- count
|
|
- first
|
|
description: Optional aggregation method, defaults to sum
|
|
required:
|
|
- columnId
|
|
- useValue
|
|
description: Configuration for a derived metric header
|
|
metricSubHeader:
|
|
oneOf:
|
|
- type: string
|
|
description: Simple string title for the metric sub-header
|
|
- type: object
|
|
properties:
|
|
columnId:
|
|
type: string
|
|
description: Which column to use for the sub-header
|
|
useValue:
|
|
type: boolean
|
|
description: Whether to display the key or the value in the chart
|
|
aggregate:
|
|
type: string
|
|
enum:
|
|
- sum
|
|
- average
|
|
- median
|
|
- max
|
|
- min
|
|
- count
|
|
- first
|
|
description: Optional aggregation method, defaults to sum
|
|
required:
|
|
- columnId
|
|
- useValue
|
|
description: Configuration for a derived metric sub-header
|
|
required:
|
|
- selectedChartType
|
|
- metricColumnId
|
|
|
|
table_chart_config:
|
|
allOf:
|
|
- \$ref: #/definitions/base_chart_config
|
|
- type: object
|
|
properties:
|
|
selectedChartType:
|
|
enum:
|
|
- table
|
|
tableColumnOrder:
|
|
type: array
|
|
items:
|
|
type: string
|
|
required:
|
|
- selectedChartType
|
|
# No additional required fields for table chart
|
|
|
|
# HELPER OBJECTS
|
|
goal_line:
|
|
type: object
|
|
properties:
|
|
show:
|
|
type: boolean
|
|
value:
|
|
type: number
|
|
goalLineLabel:
|
|
type: string
|
|
|
|
trendline:
|
|
type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
enum:
|
|
- average
|
|
- linear_regression
|
|
- min
|
|
- max
|
|
- median
|
|
columnId:
|
|
type: string
|
|
required:
|
|
- type
|
|
- columnId
|
|
\`\`\`
|
|
|
|
**CRITICAL:** This is the complete schema specification. Follow it exactly - every property, enum value, and requirement listed above must be respected. Pay special attention to:
|
|
|
|
1. **Required properties** for each chart type
|
|
2. **Enum values** for each field (e.g., selectedChartType, columnType, style)
|
|
3. **Column name casing** (must be lowercase in axis configurations)
|
|
4. **Complete columnLabelFormats** for every SQL result column
|
|
5. **Proper YAML syntax** with pipe (|) for SQL blocks
|
|
6. **Chart-specific axis configurations** (barAndLineAxis, scatterAxis, etc.)
|
|
7. **Date formatting rules** that match xAxisTimeInterval settings`,
|
|
inputSchema: z.object({
|
|
files: z
|
|
.array(
|
|
z.object({
|
|
name: z
|
|
.string()
|
|
.describe(
|
|
"The natural language name/title for the metric, exactly matching the 'name' field within the YML content. This name will identify the metric in the UI. Do not include file extensions or use file path characters."
|
|
),
|
|
yml_content: z
|
|
.string()
|
|
.describe(
|
|
"The YAML content for a single metric, adhering to the comprehensive metric schema. Multiple metrics can be created in one call by providing multiple entries in the 'files' array. **Prefer creating metrics in bulk.**"
|
|
),
|
|
})
|
|
)
|
|
.min(1)
|
|
.describe(
|
|
'List of file parameters to create. The files will contain YAML content that adheres to the metric schema specification.'
|
|
),
|
|
}),
|
|
outputSchema: z.object({
|
|
message: z.string(),
|
|
duration: z.number(),
|
|
files: z.array(
|
|
z.object({
|
|
id: z.string(),
|
|
name: z.string(),
|
|
file_type: z.string(),
|
|
result_message: z.string().optional(),
|
|
results: z.array(z.record(z.any())).optional(),
|
|
created_at: z.string(),
|
|
updated_at: z.string(),
|
|
version_number: z.number(),
|
|
})
|
|
),
|
|
failed_files: z.array(
|
|
z.object({
|
|
name: z.string(),
|
|
error: z.string(),
|
|
})
|
|
),
|
|
}),
|
|
execute: async ({ context, runtimeContext }) => {
|
|
return await createMetricFiles(
|
|
context as CreateMetricFilesParams,
|
|
runtimeContext as RuntimeContext<AnalystRuntimeContext>
|
|
);
|
|
},
|
|
});
|
|
|
|
const createMetricFiles = wrapTraced(
|
|
async (
|
|
params: CreateMetricFilesParams,
|
|
runtimeContext: RuntimeContext<AnalystRuntimeContext>
|
|
): Promise<CreateMetricFilesOutput> => {
|
|
const startTime = Date.now();
|
|
const { files } = params;
|
|
|
|
const createdFiles: FileWithId[] = [];
|
|
const failedFiles: FailedFileCreation[] = [];
|
|
|
|
// Extract context values
|
|
const dataSourceId = runtimeContext?.get('dataSourceId') as string;
|
|
const dataSourceSyntax = (runtimeContext?.get('dataSourceSyntax') || 'generic') as string;
|
|
const userId = runtimeContext?.get('userId') as string;
|
|
const organizationId = runtimeContext?.get('organizationId') as string;
|
|
const workflowStartTime = runtimeContext?.get('workflowStartTime') as number | undefined;
|
|
const messageId = runtimeContext?.get('messageId') as string | undefined;
|
|
|
|
// Generate a unique workflow ID using start time and data source
|
|
const workflowId = workflowStartTime
|
|
? `workflow-${workflowStartTime}-${dataSourceId}`
|
|
: `workflow-${Date.now()}-${dataSourceId}`;
|
|
|
|
if (!dataSourceId) {
|
|
return {
|
|
message: 'Unable to identify the data source. Please refresh and try again.',
|
|
duration: Date.now() - startTime,
|
|
files: [],
|
|
failed_files: [],
|
|
};
|
|
}
|
|
if (!userId) {
|
|
return {
|
|
message: 'Unable to verify your identity. Please log in again.',
|
|
duration: Date.now() - startTime,
|
|
files: [],
|
|
failed_files: [],
|
|
};
|
|
}
|
|
if (!organizationId) {
|
|
return {
|
|
message: 'Unable to access your organization. Please check your permissions.',
|
|
duration: Date.now() - startTime,
|
|
files: [],
|
|
failed_files: [],
|
|
};
|
|
}
|
|
|
|
// Process files concurrently
|
|
const processResults = await Promise.allSettled(
|
|
files.map(async (file) => {
|
|
const result = await processMetricFile(
|
|
file.name,
|
|
file.yml_content,
|
|
dataSourceId,
|
|
dataSourceSyntax,
|
|
userId,
|
|
organizationId,
|
|
workflowId
|
|
);
|
|
return { fileName: file.name, result };
|
|
})
|
|
);
|
|
|
|
const successfulProcessing: Array<{
|
|
fileName: string;
|
|
metricFile: FileWithId;
|
|
metricYml: MetricYml;
|
|
message: string;
|
|
results: Record<string, unknown>[];
|
|
}> = [];
|
|
|
|
// Separate successful from failed processing
|
|
for (const processResult of processResults) {
|
|
if (processResult.status === 'fulfilled') {
|
|
const { fileName, result } = processResult.value;
|
|
if (
|
|
result.success &&
|
|
result.metricFile &&
|
|
result.metricYml &&
|
|
result.message &&
|
|
result.results
|
|
) {
|
|
successfulProcessing.push({
|
|
fileName,
|
|
metricFile: result.metricFile,
|
|
metricYml: result.metricYml,
|
|
message: result.message,
|
|
results: result.results,
|
|
});
|
|
} else {
|
|
failedFiles.push({
|
|
name: fileName,
|
|
error: result.error || 'Unknown error',
|
|
});
|
|
}
|
|
} else {
|
|
failedFiles.push({
|
|
name: 'unknown',
|
|
error: processResult.reason?.message || 'Processing failed',
|
|
});
|
|
}
|
|
}
|
|
|
|
// Database operations
|
|
if (successfulProcessing.length > 0) {
|
|
try {
|
|
await db.transaction(async (tx: typeof db) => {
|
|
// Insert metric files
|
|
const metricRecords = successfulProcessing.map((sp) => ({
|
|
id: sp.metricFile.id,
|
|
name: sp.metricFile.name,
|
|
fileName: sp.fileName,
|
|
content: sp.metricYml,
|
|
verification: 'notRequested' as const,
|
|
evaluationObj: null,
|
|
evaluationSummary: null,
|
|
evaluationScore: null,
|
|
organizationId,
|
|
createdBy: userId,
|
|
createdAt: sp.metricFile.created_at,
|
|
updatedAt: sp.metricFile.updated_at,
|
|
deletedAt: null,
|
|
publiclyAccessible: false,
|
|
publiclyEnabledBy: null,
|
|
publicExpiryDate: null,
|
|
versionHistory: createInitialMetricVersionHistory(
|
|
sp.metricYml,
|
|
sp.metricFile.created_at
|
|
),
|
|
dataMetadata: sp.results ? createDataMetadata(sp.results) : null,
|
|
publicPassword: null,
|
|
dataSourceId,
|
|
}));
|
|
await tx.insert(metricFiles).values(metricRecords);
|
|
|
|
// Insert asset permissions
|
|
const assetPermissionRecords = metricRecords.map((record) => ({
|
|
identityId: userId,
|
|
identityType: 'user' as const,
|
|
assetId: record.id,
|
|
assetType: 'metric_file' as const,
|
|
role: 'owner' as const,
|
|
createdAt: new Date().toISOString(),
|
|
updatedAt: new Date().toISOString(),
|
|
deletedAt: null,
|
|
createdBy: userId,
|
|
updatedBy: userId,
|
|
}));
|
|
await tx.insert(assetPermissions).values(assetPermissionRecords);
|
|
});
|
|
|
|
// Critical save verification - ensure records were actually saved
|
|
if (successfulProcessing.length > 0) {
|
|
try {
|
|
const savedMetricIds = successfulProcessing.map((sp) => sp.metricFile.id);
|
|
const verificationResult = await db
|
|
.select({ id: metricFiles.id })
|
|
.from(metricFiles)
|
|
.where(inArray(metricFiles.id, savedMetricIds))
|
|
.limit(savedMetricIds.length);
|
|
|
|
if (verificationResult.length !== savedMetricIds.length) {
|
|
console.error('[Critical Save Verification] Mismatch in saved records:', {
|
|
expected: savedMetricIds.length,
|
|
actual: verificationResult.length,
|
|
messageId,
|
|
workflowId,
|
|
});
|
|
|
|
// Mark files as failed if verification doesn't match
|
|
const savedIds = new Set(verificationResult.map((r) => r.id));
|
|
for (const sp of successfulProcessing) {
|
|
if (!savedIds.has(sp.metricFile.id)) {
|
|
failedFiles.push({
|
|
name: sp.metricFile.name,
|
|
error: 'Critical save verification failed - record not found after save',
|
|
});
|
|
}
|
|
}
|
|
}
|
|
} catch (verifyError) {
|
|
console.error('[Critical Save Verification] Error during verification:', verifyError);
|
|
// Don't fail the entire operation, but log the issue
|
|
}
|
|
}
|
|
|
|
// Prepare successful files output
|
|
for (const sp of successfulProcessing) {
|
|
createdFiles.push({
|
|
id: sp.metricFile.id,
|
|
name: sp.metricFile.name,
|
|
file_type: sp.metricFile.file_type,
|
|
result_message: sp.metricFile.result_message || '',
|
|
results: sp.metricFile.results || [],
|
|
created_at: sp.metricFile.created_at,
|
|
updated_at: sp.metricFile.updated_at,
|
|
version_number: sp.metricFile.version_number,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
// Add all successful processing to failed if database operation fails
|
|
for (const sp of successfulProcessing) {
|
|
failedFiles.push({
|
|
name: sp.metricFile.name,
|
|
error: `Failed to save to database: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
|
|
const message = generateResultMessage(createdFiles, failedFiles);
|
|
|
|
// Track file associations if we have a messageId and created files
|
|
if (messageId && createdFiles.length > 0) {
|
|
await trackFileAssociations({
|
|
messageId,
|
|
files: createdFiles.map((file) => ({
|
|
id: file.id,
|
|
version: file.version_number,
|
|
})),
|
|
});
|
|
}
|
|
|
|
return {
|
|
message,
|
|
duration,
|
|
files: createdFiles,
|
|
failed_files: failedFiles,
|
|
};
|
|
},
|
|
{ name: 'create-metrics-file' }
|
|
);
|
|
|
|
async function processMetricFile(
|
|
_fileName: string,
|
|
ymlContent: string,
|
|
dataSourceId: string,
|
|
dataSourceDialect: string,
|
|
userId: string,
|
|
_organizationId: string,
|
|
workflowId: string
|
|
): Promise<MetricFileResult> {
|
|
try {
|
|
// Ensure timeFrame values are properly quoted before parsing
|
|
const fixedYmlContent = ensureTimeFrameQuoted(ymlContent);
|
|
|
|
// Parse and validate YAML
|
|
const parsedYml = yaml.parse(fixedYmlContent);
|
|
const metricYml = validateMetricYml(parsedYml);
|
|
|
|
// Validate and adjust bar/line chart axes
|
|
const axisValidation = validateAndAdjustBarLineAxes(metricYml);
|
|
if (!axisValidation.isValid) {
|
|
return {
|
|
success: false,
|
|
error: axisValidation.error || 'Invalid bar/line chart axis configuration',
|
|
};
|
|
}
|
|
|
|
// Use adjusted YAML if axes were swapped
|
|
const finalMetricYml =
|
|
axisValidation.shouldSwapAxes && axisValidation.adjustedYml
|
|
? axisValidation.adjustedYml
|
|
: metricYml;
|
|
|
|
// Generate deterministic UUID (simplified version)
|
|
const metricId = randomUUID();
|
|
|
|
// Validate SQL by running it
|
|
const sqlValidationResult = await validateSql(
|
|
finalMetricYml.sql,
|
|
dataSourceId,
|
|
workflowId,
|
|
userId,
|
|
dataSourceDialect
|
|
);
|
|
|
|
if (!sqlValidationResult.success) {
|
|
return {
|
|
success: false,
|
|
error: `The SQL query has an issue: ${sqlValidationResult.error}. Please check your query syntax.`,
|
|
};
|
|
}
|
|
|
|
// Create metric file object
|
|
const now = new Date().toISOString();
|
|
const metricFile: FileWithId = {
|
|
id: metricId,
|
|
name: finalMetricYml.name,
|
|
file_type: 'metric',
|
|
result_message: sqlValidationResult.message || '',
|
|
results: sqlValidationResult.results || [],
|
|
created_at: now,
|
|
updated_at: now,
|
|
version_number: 1,
|
|
};
|
|
|
|
return {
|
|
success: true,
|
|
metricFile,
|
|
metricYml: finalMetricYml,
|
|
message: sqlValidationResult.message || '',
|
|
results: sqlValidationResult.results || [],
|
|
};
|
|
} catch (error) {
|
|
let errorMessage = 'Unknown error';
|
|
|
|
if (error instanceof z.ZodError) {
|
|
// Return the actual Zod validation errors for better debugging
|
|
const issues = error.issues
|
|
.map((issue) => {
|
|
const path = issue.path.length > 0 ? ` at path '${issue.path.join('.')}'` : '';
|
|
return `${issue.message}${path}`;
|
|
})
|
|
.join('; ');
|
|
errorMessage = `The metric configuration is invalid: ${issues}`;
|
|
} else if (error instanceof Error) {
|
|
if (error.message.includes('YAMLParseError')) {
|
|
errorMessage = 'The YAML format is incorrect. Please check the syntax and indentation.';
|
|
} else {
|
|
errorMessage = error.message;
|
|
}
|
|
}
|
|
|
|
return {
|
|
success: false,
|
|
error: errorMessage,
|
|
};
|
|
}
|
|
}
|
|
|
|
async function validateSql(
|
|
sqlQuery: string,
|
|
dataSourceId: string,
|
|
workflowId: string,
|
|
userId: string,
|
|
dataSourceSyntax?: string
|
|
): Promise<ValidationResult> {
|
|
try {
|
|
if (!sqlQuery.trim()) {
|
|
return { success: false, error: 'SQL query cannot be empty' };
|
|
}
|
|
|
|
// Basic SQL validation
|
|
if (!sqlQuery.toLowerCase().includes('select')) {
|
|
return { success: false, error: 'SQL query must contain SELECT statement' };
|
|
}
|
|
|
|
if (!sqlQuery.toLowerCase().includes('from')) {
|
|
return { success: false, error: 'SQL query must contain FROM clause' };
|
|
}
|
|
|
|
// Validate permissions before attempting to get data source
|
|
const permissionResult = await validateSqlPermissions(sqlQuery, userId, dataSourceSyntax);
|
|
if (!permissionResult.isAuthorized) {
|
|
return {
|
|
success: false,
|
|
error: createPermissionErrorMessage(permissionResult.unauthorizedTables),
|
|
};
|
|
}
|
|
|
|
// Get data source from workflow manager (reuses existing connections)
|
|
const manager = getWorkflowDataSourceManager(workflowId);
|
|
let dataSource: DataSource;
|
|
|
|
try {
|
|
dataSource = await manager.getDataSource(dataSourceId);
|
|
} catch (_error) {
|
|
return {
|
|
success: false,
|
|
error: `Unable to connect to your data source. Please check that it's properly configured and accessible.`,
|
|
};
|
|
}
|
|
|
|
// Retry configuration for SQL validation
|
|
const MAX_RETRIES = 3;
|
|
const TIMEOUT_MS = 120000; // 120 seconds (2 minutes) per attempt for Snowflake queue handling
|
|
const RETRY_DELAYS = [1000, 3000, 6000]; // 1s, 3s, 6s
|
|
|
|
// Attempt execution with retries
|
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
try {
|
|
// Execute the SQL query using the DataSource with row limit and timeout for validation
|
|
// Use maxRows to limit results without modifying the SQL query (preserves Snowflake caching)
|
|
const result = await dataSource.execute({
|
|
sql: sqlQuery,
|
|
options: {
|
|
maxRows: 1000, // Additional safety limit at adapter level
|
|
timeout: TIMEOUT_MS,
|
|
},
|
|
});
|
|
|
|
if (result.success) {
|
|
const allResults = result.rows || [];
|
|
// Truncate results to 25 records for display in validation
|
|
const results = allResults.slice(0, 25);
|
|
|
|
// Validate metadata with Zod schema for runtime safety
|
|
const validatedMetadata = resultMetadataSchema.safeParse(result.metadata);
|
|
const parsedMetadata: ResultMetadata = validatedMetadata.success
|
|
? validatedMetadata.data
|
|
: undefined;
|
|
|
|
const metadata: QueryMetadata = {
|
|
rowCount: results.length,
|
|
totalRowCount: parsedMetadata?.totalRowCount ?? allResults.length,
|
|
executionTime: result.executionTime || 100,
|
|
limited: parsedMetadata?.limited ?? false,
|
|
maxRows: parsedMetadata?.maxRows ?? 5000,
|
|
};
|
|
|
|
let message: string;
|
|
if (allResults.length === 0) {
|
|
message = 'Query executed successfully but returned no records';
|
|
} else if (result.metadata?.limited) {
|
|
message = `Query validated successfully. Results were limited to ${result.metadata.maxRows} rows for memory protection (query may return more rows when executed)${results.length < allResults.length ? ` - showing first 25 of ${allResults.length} fetched` : ''}`;
|
|
} else {
|
|
message = `Query validated successfully and returned ${allResults.length} records${allResults.length > 25 ? ' (showing sample of first 25)' : ''}`;
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
message,
|
|
results,
|
|
metadata,
|
|
};
|
|
}
|
|
|
|
// Check if error is timeout-related
|
|
const errorMessage = result.error?.message || 'Query execution failed';
|
|
const isTimeout =
|
|
errorMessage.toLowerCase().includes('timeout') ||
|
|
errorMessage.toLowerCase().includes('timed out');
|
|
|
|
if (isTimeout && attempt < MAX_RETRIES) {
|
|
// Wait before retry
|
|
const delay = RETRY_DELAYS[attempt] || 6000;
|
|
console.warn(
|
|
`[create-metrics] SQL validation timeout on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`,
|
|
{
|
|
sqlPreview: `${sqlQuery.substring(0, 100)}...`,
|
|
attempt: attempt + 1,
|
|
nextDelay: delay,
|
|
}
|
|
);
|
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
continue; // Retry
|
|
}
|
|
|
|
// Not a timeout or no more retries
|
|
return {
|
|
success: false,
|
|
error: errorMessage,
|
|
};
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : 'SQL validation failed';
|
|
const isTimeout =
|
|
errorMessage.toLowerCase().includes('timeout') ||
|
|
errorMessage.toLowerCase().includes('timed out');
|
|
|
|
if (isTimeout && attempt < MAX_RETRIES) {
|
|
// Wait before retry
|
|
const delay = RETRY_DELAYS[attempt] || 6000;
|
|
console.warn(
|
|
`[create-metrics] SQL validation timeout (exception) on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`,
|
|
{
|
|
sqlPreview: `${sqlQuery.substring(0, 100)}...`,
|
|
attempt: attempt + 1,
|
|
nextDelay: delay,
|
|
error: errorMessage,
|
|
}
|
|
);
|
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
continue; // Retry
|
|
}
|
|
|
|
// Not a timeout or no more retries
|
|
return {
|
|
success: false,
|
|
error: errorMessage,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Should not reach here, but just in case
|
|
return {
|
|
success: false,
|
|
error: 'Max retries exceeded for SQL validation',
|
|
};
|
|
// Note: We don't close the data source here anymore - it's managed by the workflow manager
|
|
} catch (error) {
|
|
return {
|
|
success: false,
|
|
error: error instanceof Error ? error.message : 'SQL validation failed',
|
|
};
|
|
}
|
|
}
|
|
|
|
function generateResultMessage(
|
|
createdFiles: FileWithId[],
|
|
failedFiles: FailedFileCreation[]
|
|
): string {
|
|
if (failedFiles.length === 0) {
|
|
return `Successfully created ${createdFiles.length} metric files.`;
|
|
}
|
|
|
|
const successMsg =
|
|
createdFiles.length > 0 ? `Successfully created ${createdFiles.length} metric files. ` : '';
|
|
|
|
const failures = failedFiles.map(
|
|
(failure) =>
|
|
`Failed to create '${failure.name}': ${failure.error}.\n\nPlease recreate the metric from scratch rather than attempting to modify. This error could be due to:\n- Using a dataset that doesn't exist (please reevaluate the available datasets in the chat conversation)\n- Invalid configuration in the metric file\n- Special characters in the metric name or SQL query\n- Syntax errors in the SQL query`
|
|
);
|
|
|
|
if (failures.length === 1) {
|
|
return `${successMsg.trim()}${failures[0]}.`;
|
|
}
|
|
|
|
return `${successMsg}Failed to create ${failures.length} metric files:\n${failures.join('\n')}`;
|
|
}
|
|
|
|
/**
|
|
* Optimistic parsing function for streaming create-metrics-file tool arguments
|
|
* Extracts the files array as it's being built incrementally
|
|
*/
|
|
export function parseStreamingArgs(
|
|
accumulatedText: string
|
|
): Partial<{ files: Array<{ name: string; yml_content: string }> }> | null {
|
|
// Validate input type
|
|
if (typeof accumulatedText !== 'string') {
|
|
throw new Error(`parseStreamingArgs expects string input, got ${typeof accumulatedText}`);
|
|
}
|
|
|
|
try {
|
|
// First try to parse as complete JSON
|
|
const parsed = JSON.parse(accumulatedText);
|
|
return {
|
|
files: parsed.files || undefined,
|
|
};
|
|
} catch (error) {
|
|
// Only catch JSON parse errors - let other errors bubble up
|
|
if (error instanceof SyntaxError) {
|
|
// If JSON is incomplete, try to extract and reconstruct the files array
|
|
const filesMatch = accumulatedText.match(/"files"\s*:\s*\[(.*)/s);
|
|
if (filesMatch && filesMatch[1] !== undefined) {
|
|
const arrayContent = filesMatch[1];
|
|
|
|
try {
|
|
// Try to parse the array content by adding closing bracket
|
|
const testArray = `[${arrayContent}]`;
|
|
const parsed = JSON.parse(testArray);
|
|
return { files: parsed };
|
|
} catch {
|
|
// If that fails, try to extract file objects (both complete and incomplete)
|
|
const files: Array<{ name: string; yml_content: string }> = [];
|
|
|
|
// First, try to match complete file objects
|
|
const completeFileMatches = arrayContent.matchAll(
|
|
/\{\s*"name"\s*:\s*"([^"]*?)"\s*,\s*"yml_content"\s*:\s*"((?:[^"\\]|\\.)*)"\s*\}/g
|
|
);
|
|
|
|
for (const match of completeFileMatches) {
|
|
if (match[1] !== undefined && match[2] !== undefined) {
|
|
let ymlContent = match[2]
|
|
.replace(/\\"/g, '"')
|
|
.replace(/\\n/g, '\n')
|
|
.replace(/\\\\/g, '\\');
|
|
|
|
// Ensure timeFrame is properly quoted
|
|
ymlContent = ensureTimeFrameQuoted(ymlContent);
|
|
|
|
files.push({
|
|
name: match[1],
|
|
yml_content: ymlContent,
|
|
});
|
|
}
|
|
}
|
|
|
|
// If no complete files found, try to extract partial file objects
|
|
if (files.length === 0) {
|
|
// Try to match incomplete file objects that have at least name and partial yml_content
|
|
const incompleteFileMatch = arrayContent.match(
|
|
/\{\s*"name"\s*:\s*"([^"]*?)"\s*,\s*"yml_content"\s*:\s*"((?:[^"\\]|\\.)*)/
|
|
);
|
|
|
|
if (
|
|
incompleteFileMatch &&
|
|
incompleteFileMatch[1] !== undefined &&
|
|
incompleteFileMatch[2] !== undefined
|
|
) {
|
|
const name = incompleteFileMatch[1];
|
|
let ymlContent = incompleteFileMatch[2]
|
|
.replace(/\\"/g, '"')
|
|
.replace(/\\n/g, '\n')
|
|
.replace(/\\\\/g, '\\');
|
|
|
|
// Ensure timeFrame is properly quoted
|
|
ymlContent = ensureTimeFrameQuoted(ymlContent);
|
|
|
|
files.push({
|
|
name,
|
|
yml_content: ymlContent,
|
|
});
|
|
}
|
|
}
|
|
|
|
return { files };
|
|
}
|
|
}
|
|
|
|
// Check if we at least have the start of the files field
|
|
const partialMatch = accumulatedText.match(/"files"\s*:\s*\[/);
|
|
if (partialMatch) {
|
|
return { files: [] };
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
// Unexpected error - re-throw with context
|
|
throw new Error(
|
|
`Unexpected error in parseStreamingArgs: ${error instanceof Error ? error.message : 'Unknown error'}`
|
|
);
|
|
}
|
|
}
|