buster/packages/ai/src/tools/visualization-tools/create-metrics-file-tool.ts

1599 lines
54 KiB
TypeScript

import { randomUUID } from 'node:crypto';
import type { DataSource } from '@buster/data-source';
import { assetPermissions, db, metricFiles } from '@buster/database';
import type { RuntimeContext } from '@mastra/core/runtime-context';
import { createTool } from '@mastra/core/tools';
import { wrapTraced } from 'braintrust';
import { inArray } from 'drizzle-orm';
import * as yaml from 'yaml';
import { z } from 'zod';
import { getWorkflowDataSourceManager } from '../../utils/data-source-manager';
import { createPermissionErrorMessage, validateSqlPermissions } from '../../utils/sql-permissions';
import type { AnalystRuntimeContext } from '../../workflows/analyst-workflow';
import { validateAndAdjustBarLineAxes } from './bar-line-axis-validator';
import { trackFileAssociations } from './file-tracking-helper';
import { createInitialMetricVersionHistory, validateMetricYml } from './version-history-helpers';
import type { MetricYml } from './version-history-types';
// TypeScript types matching Rust DataMetadata structure
enum SimpleType {
Number = 'number',
String = 'string',
Date = 'date',
Boolean = 'boolean',
Other = 'other',
}
enum ColumnType {
Int2 = 'int2',
Int4 = 'int4',
Int8 = 'int8',
Float4 = 'float4',
Float8 = 'float8',
Varchar = 'varchar',
Text = 'text',
Bool = 'bool',
Date = 'date',
Timestamp = 'timestamp',
Timestamptz = 'timestamptz',
Other = 'other',
}
interface ColumnMetaData {
name: string;
min_value: unknown;
max_value: unknown;
unique_values: number;
simple_type: SimpleType;
type: ColumnType;
}
interface DataMetadata {
column_count: number;
row_count: number;
column_metadata: ColumnMetaData[];
}
/**
* Analyzes query results to create DataMetadata structure
*/
function createDataMetadata(results: Record<string, unknown>[]): DataMetadata {
if (!results.length) {
return {
column_count: 0,
row_count: 0,
column_metadata: [],
};
}
const columnNames = Object.keys(results[0] || {});
const columnMetadata: ColumnMetaData[] = [];
for (const columnName of columnNames) {
const values = results
.map((row) => row[columnName])
.filter((v) => v !== null && v !== undefined);
// Determine column type based on the first non-null value
let columnType = ColumnType.Other;
let simpleType = SimpleType.Other;
if (values.length > 0) {
const firstValue = values[0];
if (typeof firstValue === 'number') {
columnType = Number.isInteger(firstValue) ? ColumnType.Int4 : ColumnType.Float8;
simpleType = SimpleType.Number;
} else if (typeof firstValue === 'boolean') {
columnType = ColumnType.Bool;
simpleType = SimpleType.Boolean;
} else if (firstValue instanceof Date) {
columnType = ColumnType.Timestamp;
simpleType = SimpleType.Date;
} else if (typeof firstValue === 'string') {
// Check if it's a numeric string first
if (!Number.isNaN(Number(firstValue))) {
columnType = Number.isInteger(Number(firstValue)) ? ColumnType.Int4 : ColumnType.Float8;
simpleType = SimpleType.Number;
} else if (
!Number.isNaN(Date.parse(firstValue)) &&
// Additional check to avoid parsing simple numbers as dates
(firstValue.includes('-') || firstValue.includes('/') || firstValue.includes(':'))
) {
columnType = ColumnType.Timestamp;
simpleType = SimpleType.Date;
} else {
columnType = ColumnType.Varchar;
simpleType = SimpleType.String;
}
}
}
// Calculate min/max values
let minValue: unknown = null;
let maxValue: unknown = null;
if (values.length > 0) {
if (simpleType === SimpleType.Number) {
const numValues = values
.map((v) => {
if (typeof v === 'number') return v;
if (typeof v === 'string' && !Number.isNaN(Number(v))) return Number(v);
return null;
})
.filter((v) => v !== null) as number[];
if (numValues.length > 0) {
minValue = Math.min(...numValues);
maxValue = Math.max(...numValues);
}
} else if (simpleType === SimpleType.Date) {
const dateValues = values
.map((v) => {
if (v instanceof Date) return v;
if (typeof v === 'string') {
const parsed = new Date(v);
return Number.isNaN(parsed.getTime()) ? null : parsed;
}
return null;
})
.filter((d) => d !== null) as Date[];
if (dateValues.length > 0) {
minValue = new Date(Math.min(...dateValues.map((d) => d.getTime())));
maxValue = new Date(Math.max(...dateValues.map((d) => d.getTime())));
}
} else if (simpleType === SimpleType.String) {
const strValues = values.filter((v) => typeof v === 'string') as string[];
if (strValues.length > 0) {
minValue = strValues.sort()[0];
maxValue = strValues.sort().reverse()[0];
}
}
}
// Calculate unique values count
const uniqueValues = new Set(values).size;
columnMetadata.push({
name: columnName.toLowerCase(),
min_value: minValue,
max_value: maxValue,
unique_values: uniqueValues,
simple_type: simpleType,
type: columnType,
});
}
return {
column_count: columnNames.length,
row_count: results.length,
column_metadata: columnMetadata,
};
}
/**
* Ensures timeFrame values are properly quoted in YAML content
* Finds timeFrame: value and wraps the value in quotes if not already quoted
*/
function ensureTimeFrameQuoted(ymlContent: string): string {
// Regex to match timeFrame field with its value
// Captures: timeFrame + whitespace + : + whitespace + value (until end of line)
const timeFrameRegex = /(timeFrame\s*:\s*)([^\r\n]+)/g;
return ymlContent.replace(timeFrameRegex, (match, prefix, value) => {
// Trim whitespace from the value
const trimmedValue = value.trim();
// Check if value is already properly quoted (starts and ends with same quote type)
const isAlreadyQuoted =
(trimmedValue.startsWith('"') && trimmedValue.endsWith('"')) ||
(trimmedValue.startsWith("'") && trimmedValue.endsWith("'"));
if (isAlreadyQuoted) {
// Already quoted, return as is
return match;
}
// Not quoted, wrap in double quotes
return `${prefix}"${trimmedValue}"`;
});
}
// Core interfaces matching Rust structs
interface MetricFileParams {
name: string;
yml_content: string;
}
// Zod schema for validating result metadata from DataSource
const resultMetadataSchema = z
.object({
totalRowCount: z.number().optional(),
limited: z.boolean().optional(),
maxRows: z.number().optional(),
})
.optional();
type ResultMetadata = z.infer<typeof resultMetadataSchema>;
interface QueryMetadata {
rowCount: number;
totalRowCount: number;
executionTime: number;
limited: boolean;
maxRows?: number;
}
interface ValidationResult {
success: boolean;
message?: string;
results?: Record<string, unknown>[];
metadata?: QueryMetadata;
error?: string;
}
interface MetricFileResult {
success: boolean;
metricFile?: FileWithId;
metricYml?: MetricYml;
message?: string;
results?: Record<string, unknown>[];
error?: string;
}
interface CreateMetricFilesParams {
files: MetricFileParams[];
}
interface FailedFileCreation {
name: string;
error: string;
}
interface FileWithId {
id: string;
name: string;
file_type: string;
result_message?: string;
results?: Record<string, unknown>[];
created_at: string;
updated_at: string;
version_number: number;
}
interface CreateMetricFilesOutput {
message: string;
duration: number;
files: FileWithId[];
failed_files: FailedFileCreation[];
}
// Tool implementation with complete schema included
export const createMetrics = createTool({
id: 'create-metrics-file',
description: `Creates metric configuration files with YAML content following the metric schema specification. Before using this tool, carefully consider the appropriate visualization type (bar, line, scatter, pie, combo, metric, table) and its specific configuration requirements. Each visualization has unique axis settings, formatting options, and data structure needs that must be thoroughly planned to create effective metrics. **This tool supports creating multiple metrics in a single call; prefer using bulk creation over creating metrics one by one.**
Only utilize the required/default fields unless the user specifically requests that optional fields be added.
## COMPLETE METRIC YAML SCHEMA SPECIFICATION
\`\`\`
# METRIC CONFIGURATION - YML STRUCTURE
# -------------------------------------
# REQUIRED Top-Level Fields: \`name\`, \`description\`, \`timeFrame\`, \`sql\`, \`chartConfig\`
#
# --- FIELD DETAILS & RULES ---
# \`name\`: Human-readable title (e.g., Total Sales).
# - RULE: CANNOT contain underscores (\`_\`). Use spaces instead.
# \`description\`: Detailed explanation of the metric.
# \`timeFrame\`: Human-readable time period covered by the query, similar to a filter in a BI tool. MUST BE A VALID STRING.
# - If doing 2024 as an example, you must do "2024" can't parse as a number.
# - For queries with fixed date filters, use specific date ranges, e.g., "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025".
# - For queries with relative date filters or no date filter, use relative terms, e.g., "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time", etc.
# - For comparisons, use "Comparison - [Period 1] vs [Period 2]", with each period formatted according to whether it is fixed or relative, e.g., "Comparison - Last 30 days vs Previous 30 days" or "Comparison - June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025".
# Rules:
# - Must accurately reflect the date/time filter used in the \`sql\` field. Do not misrepresent the time range.
# - Use full month names for dates, e.g., "January", not "Jan".
# - Follow general quoting rules. CANNOT contain ':'.
# Note: Respond only with the time period, without explanation or additional copy.
# \`sql\`: The SQL query for the metric.
# - RULE: MUST use the pipe \`|\` block scalar style to preserve formatting and newlines.
# - NOTE: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This applies to all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
# - Example:
# sql: |
# SELECT ...
# \`chartConfig\`: Visualization settings.
# - RULE: Must contain \`selectedChartType\` (bar, line, scatter, pie, combo, metric, table).
# - RULE: Must contain \`columnLabelFormats\` defining format for ALL columns in the SQL result.
# - RULE: Must contain ONE chart-specific config block based on \`selectedChartType\`:
# - \`barAndLineAxis\` (for type: bar, line)
# - \`scatterAxis\` (for type: scatter)
# - \`pieChartAxis\` (for type: pie)
# - \`comboChartAxis\` (for type: combo)
# - \`metricColumnId\` (for type: metric)
# - \`tableConfig\` (for type: table) - [Optional, if needed beyond basic columns]
#
# --- GENERAL YAML RULES ---
# 1. Use standard YAML syntax (indentation, colons for key-value, \`-\` for arrays).
# 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (\`"..."\`) ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, \`) or needs to preserve leading/trailing whitespace.
# 3. Metric name, timeframe, or description CANNOT contain \`:\`
# -------------------------------------
# --- FORMAL SCHEMA --- (Used for validation, reflects rules above)
type: object
name: Metric Configuration Schema
description: Metric definition with SQL query and visualization settings
properties:
# NAME
name:
required: true
type: string
description: Human-readable title (e.g., Total Sales). NO underscores. Follow quoting rules. Should not contain \`:\`
# DESCRIPTION
description:
required: true
type: string
description: |
A natural language description of the metric, essentially rephrasing the 'name' field as a question or statement.
Example: If name is "Total Sales", description could be "What are the total sales?".
RULE: Should NOT describe the chart type, axes, or any visualization aspects.
RULE: Follow general quoting rules.
RULE: Should not contain ':'.
# TIME FRAME
timeFrame:
required: true
type: string
description: |
Human-readable time period covered by the SQL query, similar to a filter in a BI tool.
RULE: Must accurately reflect the date/time filter used in the \`sql\` field. Do not misrepresent the time range.
Examples:
- Fixed Dates: "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025"
- Relative Dates: "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time"
- Comparisons: Use the format "Comparison: [Period 1] vs [Period 2]". Examples:
- "Comparison: Last 30 days vs Previous 30 days"
- "Comparison: June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025"
RULE: Use full month names for dates, e.g., "January", not "Jan".
RULE: Follow general quoting rules. CANNOT contain ':'.
# SQL QUERY
sql:
required: true
type: string
description: |
SQL query using YAML pipe syntax (|).
The SQL query should be formatted with proper indentation using the YAML pipe (|) syntax.
This ensures the multi-line SQL is properly parsed while preserving whitespace and newlines.
IMPORTANT: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This rule is critical for all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
Example:
sql: |
SELECT column1, column2
FROM my_table
WHERE condition;
# CHART CONFIGURATION
chartConfig:
required: true
description: Visualization settings (must include selectedChartType, columnLabelFormats, and ONE chart-specific block)
allOf: # Base requirements for ALL chart types
- \$ref: '#/definitions/base_chart_config'
oneOf: # Specific block required based on type
- \$ref: #/definitions/bar_line_chart_config
- \$ref: #/definitions/scatter_chart_config
- \$ref: #/definitions/pie_chart_config
- \$ref: #/definitions/combo_chart_config
- \$ref: #/definitions/metric_chart_config
- \$ref: #/definitions/table_chart_config
required:
- name
- timeFrame
- sql
- chartConfig
definitions:
# BASE CHART CONFIG (common parts used by ALL chart types)
base_chart_config:
type: object
properties:
selectedChartType:
type: string
description: Chart type (bar, line, scatter, pie, combo, metric, table)
enum: [bar, line, scatter, pie, combo, metric, table]
columnLabelFormats:
type: object
description: REQUIRED formatting for ALL columns returned by the SQL query.
additionalProperties:
\$ref: #/definitions/column_label_format
# Optional base properties below
columnSettings:
type: object
description: |-
Visual settings applied per column.
Keys MUST be LOWERCASE column names from the SQL query results.
Example: \`total_sales: { showDataLabels: true }\`
additionalProperties:
\$ref: #/definitions/column_settings
colors:
type: array
items:
type: string
description: |
Default color palette.
RULE: Hex color codes (e.g., #FF0000) MUST be enclosed in quotes (e.g., "#FF0000" or '#FF0000') because '#' signifies a comment otherwise. Double quotes are preferred for consistency.
Use this parameter when the user asks about customizing chart colors, unless specified otherwise.
showLegend:
type: boolean
gridLines:
type: boolean
showLegendHeadline:
oneOf:
- type: boolean
- type: string
goalLines:
type: array
items:
\$ref: #/definitions/goal_line
trendlines:
type: array
items:
\$ref: #/definitions/trendline
disableTooltip:
type: boolean
# Axis Configurations
# RULE: By default, only add \`xAxisConfig\` and ONLY set its \`xAxisTimeInterval\` property
# when visualizing date/time data on the X-axis (e.g., line, bar, combo charts).
# Do NOT add other \`xAxisConfig\` properties, \`yAxisConfig\`, or \`y2AxisConfig\`
# unless the user explicitly asks for specific axis modifications.
xAxisConfig:
description: Controls X-axis properties. For date/time axes, MUST contain \`xAxisTimeInterval\` (day, week, month, quarter, year). Other properties control label visibility, title, rotation, and zoom. Only add when needed (dates) or requested by user.
\$ref: '#/definitions/x_axis_config'
yAxisConfig:
description: Controls Y-axis properties. Only add if the user explicitly requests Y-axis modifications (e.g., hiding labels, changing title). Properties control label visibility, title, rotation, and zoom.
\$ref: '#/definitions/y_axis_config'
y2AxisConfig:
description: Controls secondary Y-axis (Y2) properties, primarily for combo charts. Only add if the user explicitly requests Y2-axis modifications. Properties control label visibility, title, rotation, and zoom.
\$ref: '#/definitions/y2_axis_config'
categoryAxisStyleConfig:
description: Optional style configuration for the category axis (color/grouping).
\$ref: '#/definitions/category_axis_style_config'
required:
- selectedChartType
- columnLabelFormats
# AXIS CONFIGURATIONS
x_axis_config:
type: object
properties:
xAxisTimeInterval:
type: string
enum: [day, week, month, quarter, year, 'null']
description: REQUIRED time interval for grouping date/time values on the X-axis (e.g., for line/combo charts). MUST be set if the X-axis represents time. Default: null.
xAxisShowAxisLabel:
type: boolean
description: Show X-axis labels. Default: true.
xAxisShowAxisTitle:
type: boolean
description: Show X-axis title. Default: true.
xAxisAxisTitle:
type: [string, 'null']
description: X-axis title. Default: null (auto-generates from column names).
xAxisLabelRotation:
type: string # Representing numbers or 'auto'
enum: ["0", "45", "90", auto]
description: Label rotation. Default: auto.
xAxisDataZoom:
type: boolean
description: Enable data zoom on X-axis. Default: false (User only).
additionalProperties: false
required:
- xAxisTimeInterval
y_axis_config:
type: object
properties:
yAxisShowAxisLabel:
type: boolean
description: Show Y-axis labels. Default: true.
yAxisShowAxisTitle:
type: boolean
description: Show Y-axis title. Default: true.
yAxisAxisTitle:
type: [string, 'null']
description: Y-axis title. Default: null (uses first plotted column name).
yAxisStartAxisAtZero:
type: [boolean, 'null']
description: Start Y-axis at zero. Default: true.
yAxisScaleType:
type: string
enum: [log, linear]
description: Scale type for Y-axis. Default: linear.
additionalProperties: false
y2_axis_config:
type: object
description: Secondary Y-axis configuration (for combo charts).
properties:
y2AxisShowAxisLabel:
type: boolean
description: Show Y2-axis labels. Default: true.
y2AxisShowAxisTitle:
type: boolean
description: Show Y2-axis title. Default: true.
y2AxisAxisTitle:
type: [string, 'null']
description: Y2-axis title. Default: null (uses first plotted column name).
y2AxisStartAxisAtZero:
type: [boolean, 'null']
description: Start Y2-axis at zero. Default: true.
y2AxisScaleType:
type: string
enum: [log, linear]
description: Scale type for Y2-axis. Default: linear.
additionalProperties: false
category_axis_style_config:
type: object
description: Style configuration for the category axis (color/grouping).
properties:
categoryAxisTitle:
type: [string, 'null']
description: Title for the category axis.
additionalProperties: false
# COLUMN FORMATTING
column_label_format:
type: object
properties:
columnType:
type: string
description: number, string, date
enum: [number, string, date]
style:
type: string
enum:
- currency # Note: The "$" sign is automatically prepended.
- percent # Note: "%" sign is appended. For percentage values:
# - If the value comes directly from a database column, use multiplier: 1
# - If the value is calculated in your SQL query and not already multiplied by 100, use multiplier: 100
- number
- date # Note: For date columns, consider setting xAxisTimeInterval in xAxisConfig to control date grouping (day, week, month, quarter, year)
- string
multiplier:
type: number
description: Value to multiply the number by before display. Default value is 1. For percentages, the multiplier depends on how the data is sourced: if the value comes directly from a database column, use multiplier: 1; if the value is calculated in your SQL query and not already multiplied by 100, use multiplier: 100.
displayName:
type: string
description: Custom display name for the column
numberSeparatorStyle:
type: string
description: Style for number separators. Your option is ',' or a null value. Not null wrapped in quotes, a null value.
minimumFractionDigits:
type: integer
description: Minimum number of fraction digits to display
maximumFractionDigits:
type: integer
description: Maximum number of fraction digits to display
prefix:
type: string
suffix:
type: string
replaceMissingDataWith:
type: number
description: Value to display when data is missing, needs to be set to 0. Should only be set on number columns. All others should be set to null.
compactNumbers:
type: boolean
description: Whether to display numbers in compact form (e.g., 1K, 1M)
currency:
type: string
description: Currency code for currency formatting (e.g., USD, EUR)
dateFormat:
type: string
description: |
Format string for date display (must be compatible with Day.js format strings).
RULE: Choose format based on xAxisTimeInterval:
- year: 'YYYY' (e.g., 2025)
- quarter: '[Q]Q YYYY' (e.g., Q1 2025)
- month: 'MMM YYYY' (e.g., Jan 2025) or 'MMMM' (e.g., January) if context is clear.
- week/day: 'MMM D, YYYY' (e.g., Jan 25, 2025) or 'MMM D' (e.g., Jan 25) if context is clear.
useRelativeTime:
type: boolean
description: Whether to display dates as relative time (e.g., 2 days ago)
isUtc:
type: boolean
description: Whether to interpret dates as UTC
convertNumberTo:
type: string
description: Optional. Convert numeric values to time units or date parts. This is a necessity for time series data when numbers are passed instead of the date.
enum:
- day_of_week
- month_of_year
- quarter
required:
- columnType
- style
- replaceMissingDataWith
- numberSeparatorStyle
# COLUMN VISUAL SETTINGS
column_settings:
type: object
description: Optional visual settings per LOWERCASE column name.
properties:
showDataLabels:
type: boolean
columnVisualization:
type: string
enum:
- bar
- line
- dot
lineWidth:
type: number
lineStyle:
type: string
enum:
- area
- line
lineType:
type: string
enum:
- normal
- smooth
- step
# CHART-SPECIFIC CONFIGURATIONS
bar_line_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- bar
- line
barAndLineAxis:
type: object
properties:
x:
type: array
items:
type: string
y:
type: array
items:
type: string
description: LOWERCASE column name from SQL for X-axis.
category:
type: array
items:
type: string
description: LOWERCASE column name from SQL for category grouping.
required:
- x
- y
barLayout:
type: string
enum:
- horizontal
- vertical
barGroupType:
type: string
enum:
- stack
- group
- percentage-stack
required:
- selectedChartType
- barAndLineAxis
scatter_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- scatter
scatterAxis:
type: object
properties:
x:
type: array
items:
type: string
y:
type: array
items:
type: string
category:
type: array
items:
type: string
size:
type: array
items:
type: string
required:
- x
- y
required:
- selectedChartType
- scatterAxis
pie_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- pie
pieChartAxis:
type: object
properties:
x:
type: array
items:
type: string
y:
type: array
items:
type: string
required:
- x
- y
required:
- selectedChartType
- pieChartAxis
combo_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- combo
comboChartAxis:
type: object
properties:
x:
type: array
items:
type: string
y:
type: array
items:
type: string
y2:
type: array
items:
type: string
required:
- x
- y
- y2
required:
- selectedChartType
- comboChartAxis
metric_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- metric
metricColumnId:
type: string
description: LOWERCASE column name from SQL for the main metric value.
metricValueAggregate:
type: string
enum:
- sum
- average
- median
- max
- min
- count
- first
description: Aggregate function for metric value
metricHeader:
oneOf:
- type: string
description: Simple string title for the metric header
- type: object
properties:
columnId:
type: string
description: Which column to use for the header
useValue:
type: boolean
description: Whether to display the key or the value in the chart
aggregate:
type: string
enum:
- sum
- average
- median
- max
- min
- count
- first
description: Optional aggregation method, defaults to sum
required:
- columnId
- useValue
description: Configuration for a derived metric header
metricSubHeader:
oneOf:
- type: string
description: Simple string title for the metric sub-header
- type: object
properties:
columnId:
type: string
description: Which column to use for the sub-header
useValue:
type: boolean
description: Whether to display the key or the value in the chart
aggregate:
type: string
enum:
- sum
- average
- median
- max
- min
- count
- first
description: Optional aggregation method, defaults to sum
required:
- columnId
- useValue
description: Configuration for a derived metric sub-header
required:
- selectedChartType
- metricColumnId
table_chart_config:
allOf:
- \$ref: #/definitions/base_chart_config
- type: object
properties:
selectedChartType:
enum:
- table
tableColumnOrder:
type: array
items:
type: string
required:
- selectedChartType
# No additional required fields for table chart
# HELPER OBJECTS
goal_line:
type: object
properties:
show:
type: boolean
value:
type: number
goalLineLabel:
type: string
trendline:
type: object
properties:
type:
type: string
enum:
- average
- linear_regression
- min
- max
- median
columnId:
type: string
required:
- type
- columnId
\`\`\`
**CRITICAL:** This is the complete schema specification. Follow it exactly - every property, enum value, and requirement listed above must be respected. Pay special attention to:
1. **Required properties** for each chart type
2. **Enum values** for each field (e.g., selectedChartType, columnType, style)
3. **Column name casing** (must be lowercase in axis configurations)
4. **Complete columnLabelFormats** for every SQL result column
5. **Proper YAML syntax** with pipe (|) for SQL blocks
6. **Chart-specific axis configurations** (barAndLineAxis, scatterAxis, etc.)
7. **Date formatting rules** that match xAxisTimeInterval settings`,
inputSchema: z.object({
files: z
.array(
z.object({
name: z
.string()
.describe(
"The natural language name/title for the metric, exactly matching the 'name' field within the YML content. This name will identify the metric in the UI. Do not include file extensions or use file path characters."
),
yml_content: z
.string()
.describe(
"The YAML content for a single metric, adhering to the comprehensive metric schema. Multiple metrics can be created in one call by providing multiple entries in the 'files' array. **Prefer creating metrics in bulk.**"
),
})
)
.min(1)
.describe(
'List of file parameters to create. The files will contain YAML content that adheres to the metric schema specification.'
),
}),
outputSchema: z.object({
message: z.string(),
duration: z.number(),
files: z.array(
z.object({
id: z.string(),
name: z.string(),
file_type: z.string(),
result_message: z.string().optional(),
results: z.array(z.record(z.any())).optional(),
created_at: z.string(),
updated_at: z.string(),
version_number: z.number(),
})
),
failed_files: z.array(
z.object({
name: z.string(),
error: z.string(),
})
),
}),
execute: async ({ context, runtimeContext }) => {
return await createMetricFiles(
context as CreateMetricFilesParams,
runtimeContext as RuntimeContext<AnalystRuntimeContext>
);
},
});
const createMetricFiles = wrapTraced(
async (
params: CreateMetricFilesParams,
runtimeContext: RuntimeContext<AnalystRuntimeContext>
): Promise<CreateMetricFilesOutput> => {
const startTime = Date.now();
const { files } = params;
const createdFiles: FileWithId[] = [];
const failedFiles: FailedFileCreation[] = [];
// Extract context values
const dataSourceId = runtimeContext?.get('dataSourceId') as string;
const dataSourceSyntax = (runtimeContext?.get('dataSourceSyntax') || 'generic') as string;
const userId = runtimeContext?.get('userId') as string;
const organizationId = runtimeContext?.get('organizationId') as string;
const workflowStartTime = runtimeContext?.get('workflowStartTime') as number | undefined;
const messageId = runtimeContext?.get('messageId') as string | undefined;
// Generate a unique workflow ID using start time and data source
const workflowId = workflowStartTime
? `workflow-${workflowStartTime}-${dataSourceId}`
: `workflow-${Date.now()}-${dataSourceId}`;
if (!dataSourceId) {
return {
message: 'Unable to identify the data source. Please refresh and try again.',
duration: Date.now() - startTime,
files: [],
failed_files: [],
};
}
if (!userId) {
return {
message: 'Unable to verify your identity. Please log in again.',
duration: Date.now() - startTime,
files: [],
failed_files: [],
};
}
if (!organizationId) {
return {
message: 'Unable to access your organization. Please check your permissions.',
duration: Date.now() - startTime,
files: [],
failed_files: [],
};
}
// Process files concurrently
const processResults = await Promise.allSettled(
files.map(async (file) => {
const result = await processMetricFile(
file.name,
file.yml_content,
dataSourceId,
dataSourceSyntax,
userId,
organizationId,
workflowId
);
return { fileName: file.name, result };
})
);
const successfulProcessing: Array<{
fileName: string;
metricFile: FileWithId;
metricYml: MetricYml;
message: string;
results: Record<string, unknown>[];
}> = [];
// Separate successful from failed processing
for (const processResult of processResults) {
if (processResult.status === 'fulfilled') {
const { fileName, result } = processResult.value;
if (
result.success &&
result.metricFile &&
result.metricYml &&
result.message &&
result.results
) {
successfulProcessing.push({
fileName,
metricFile: result.metricFile,
metricYml: result.metricYml,
message: result.message,
results: result.results,
});
} else {
failedFiles.push({
name: fileName,
error: result.error || 'Unknown error',
});
}
} else {
failedFiles.push({
name: 'unknown',
error: processResult.reason?.message || 'Processing failed',
});
}
}
// Database operations
if (successfulProcessing.length > 0) {
try {
await db.transaction(async (tx: typeof db) => {
// Insert metric files
const metricRecords = successfulProcessing.map((sp) => ({
id: sp.metricFile.id,
name: sp.metricFile.name,
fileName: sp.fileName,
content: sp.metricYml,
verification: 'notRequested' as const,
evaluationObj: null,
evaluationSummary: null,
evaluationScore: null,
organizationId,
createdBy: userId,
createdAt: sp.metricFile.created_at,
updatedAt: sp.metricFile.updated_at,
deletedAt: null,
publiclyAccessible: false,
publiclyEnabledBy: null,
publicExpiryDate: null,
versionHistory: createInitialMetricVersionHistory(
sp.metricYml,
sp.metricFile.created_at
),
dataMetadata: sp.results ? createDataMetadata(sp.results) : null,
publicPassword: null,
dataSourceId,
}));
await tx.insert(metricFiles).values(metricRecords);
// Insert asset permissions
const assetPermissionRecords = metricRecords.map((record) => ({
identityId: userId,
identityType: 'user' as const,
assetId: record.id,
assetType: 'metric_file' as const,
role: 'owner' as const,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
deletedAt: null,
createdBy: userId,
updatedBy: userId,
}));
await tx.insert(assetPermissions).values(assetPermissionRecords);
});
// Critical save verification - ensure records were actually saved
if (successfulProcessing.length > 0) {
try {
const savedMetricIds = successfulProcessing.map((sp) => sp.metricFile.id);
const verificationResult = await db
.select({ id: metricFiles.id })
.from(metricFiles)
.where(inArray(metricFiles.id, savedMetricIds))
.limit(savedMetricIds.length);
if (verificationResult.length !== savedMetricIds.length) {
console.error('[Critical Save Verification] Mismatch in saved records:', {
expected: savedMetricIds.length,
actual: verificationResult.length,
messageId,
workflowId,
});
// Mark files as failed if verification doesn't match
const savedIds = new Set(verificationResult.map((r) => r.id));
for (const sp of successfulProcessing) {
if (!savedIds.has(sp.metricFile.id)) {
failedFiles.push({
name: sp.metricFile.name,
error: 'Critical save verification failed - record not found after save',
});
}
}
}
} catch (verifyError) {
console.error('[Critical Save Verification] Error during verification:', verifyError);
// Don't fail the entire operation, but log the issue
}
}
// Prepare successful files output
for (const sp of successfulProcessing) {
createdFiles.push({
id: sp.metricFile.id,
name: sp.metricFile.name,
file_type: sp.metricFile.file_type,
result_message: sp.metricFile.result_message || '',
results: sp.metricFile.results || [],
created_at: sp.metricFile.created_at,
updated_at: sp.metricFile.updated_at,
version_number: sp.metricFile.version_number,
});
}
} catch (error) {
// Add all successful processing to failed if database operation fails
for (const sp of successfulProcessing) {
failedFiles.push({
name: sp.metricFile.name,
error: `Failed to save to database: ${error instanceof Error ? error.message : 'Unknown error'}`,
});
}
}
}
const duration = Date.now() - startTime;
const message = generateResultMessage(createdFiles, failedFiles);
// Track file associations if we have a messageId and created files
if (messageId && createdFiles.length > 0) {
await trackFileAssociations({
messageId,
files: createdFiles.map((file) => ({
id: file.id,
version: file.version_number,
})),
});
}
return {
message,
duration,
files: createdFiles,
failed_files: failedFiles,
};
},
{ name: 'create-metrics-file' }
);
async function processMetricFile(
_fileName: string,
ymlContent: string,
dataSourceId: string,
dataSourceDialect: string,
userId: string,
_organizationId: string,
workflowId: string
): Promise<MetricFileResult> {
try {
// Ensure timeFrame values are properly quoted before parsing
const fixedYmlContent = ensureTimeFrameQuoted(ymlContent);
// Parse and validate YAML
const parsedYml = yaml.parse(fixedYmlContent);
const metricYml = validateMetricYml(parsedYml);
// Validate and adjust bar/line chart axes
const axisValidation = validateAndAdjustBarLineAxes(metricYml);
if (!axisValidation.isValid) {
return {
success: false,
error: axisValidation.error || 'Invalid bar/line chart axis configuration',
};
}
// Use adjusted YAML if axes were swapped
const finalMetricYml =
axisValidation.shouldSwapAxes && axisValidation.adjustedYml
? axisValidation.adjustedYml
: metricYml;
// Generate deterministic UUID (simplified version)
const metricId = randomUUID();
// Validate SQL by running it
const sqlValidationResult = await validateSql(
finalMetricYml.sql,
dataSourceId,
workflowId,
userId,
dataSourceDialect
);
if (!sqlValidationResult.success) {
return {
success: false,
error: `The SQL query has an issue: ${sqlValidationResult.error}. Please check your query syntax.`,
};
}
// Create metric file object
const now = new Date().toISOString();
const metricFile: FileWithId = {
id: metricId,
name: finalMetricYml.name,
file_type: 'metric',
result_message: sqlValidationResult.message || '',
results: sqlValidationResult.results || [],
created_at: now,
updated_at: now,
version_number: 1,
};
return {
success: true,
metricFile,
metricYml: finalMetricYml,
message: sqlValidationResult.message || '',
results: sqlValidationResult.results || [],
};
} catch (error) {
let errorMessage = 'Unknown error';
if (error instanceof z.ZodError) {
// Return the actual Zod validation errors for better debugging
const issues = error.issues
.map((issue) => {
const path = issue.path.length > 0 ? ` at path '${issue.path.join('.')}'` : '';
return `${issue.message}${path}`;
})
.join('; ');
errorMessage = `The metric configuration is invalid: ${issues}`;
} else if (error instanceof Error) {
if (error.message.includes('YAMLParseError')) {
errorMessage = 'The YAML format is incorrect. Please check the syntax and indentation.';
} else {
errorMessage = error.message;
}
}
return {
success: false,
error: errorMessage,
};
}
}
async function validateSql(
sqlQuery: string,
dataSourceId: string,
workflowId: string,
userId: string,
dataSourceSyntax?: string
): Promise<ValidationResult> {
try {
if (!sqlQuery.trim()) {
return { success: false, error: 'SQL query cannot be empty' };
}
// Basic SQL validation
if (!sqlQuery.toLowerCase().includes('select')) {
return { success: false, error: 'SQL query must contain SELECT statement' };
}
if (!sqlQuery.toLowerCase().includes('from')) {
return { success: false, error: 'SQL query must contain FROM clause' };
}
// Validate permissions before attempting to get data source
const permissionResult = await validateSqlPermissions(sqlQuery, userId, dataSourceSyntax);
if (!permissionResult.isAuthorized) {
return {
success: false,
error: createPermissionErrorMessage(permissionResult.unauthorizedTables),
};
}
// Get data source from workflow manager (reuses existing connections)
const manager = getWorkflowDataSourceManager(workflowId);
let dataSource: DataSource;
try {
dataSource = await manager.getDataSource(dataSourceId);
} catch (_error) {
return {
success: false,
error: `Unable to connect to your data source. Please check that it's properly configured and accessible.`,
};
}
// Retry configuration for SQL validation
const MAX_RETRIES = 3;
const TIMEOUT_MS = 120000; // 120 seconds (2 minutes) per attempt for Snowflake queue handling
const RETRY_DELAYS = [1000, 3000, 6000]; // 1s, 3s, 6s
// Attempt execution with retries
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
// Execute the SQL query using the DataSource with row limit and timeout for validation
// Use maxRows to limit results without modifying the SQL query (preserves Snowflake caching)
const result = await dataSource.execute({
sql: sqlQuery,
options: {
maxRows: 1000, // Additional safety limit at adapter level
timeout: TIMEOUT_MS,
},
});
if (result.success) {
const allResults = result.rows || [];
// Truncate results to 25 records for display in validation
const results = allResults.slice(0, 25);
// Validate metadata with Zod schema for runtime safety
const validatedMetadata = resultMetadataSchema.safeParse(result.metadata);
const parsedMetadata: ResultMetadata = validatedMetadata.success
? validatedMetadata.data
: undefined;
const metadata: QueryMetadata = {
rowCount: results.length,
totalRowCount: parsedMetadata?.totalRowCount ?? allResults.length,
executionTime: result.executionTime || 100,
limited: parsedMetadata?.limited ?? false,
maxRows: parsedMetadata?.maxRows ?? 5000,
};
let message: string;
if (allResults.length === 0) {
message = 'Query executed successfully but returned no records';
} else if (result.metadata?.limited) {
message = `Query validated successfully. Results were limited to ${result.metadata.maxRows} rows for memory protection (query may return more rows when executed)${results.length < allResults.length ? ` - showing first 25 of ${allResults.length} fetched` : ''}`;
} else {
message = `Query validated successfully and returned ${allResults.length} records${allResults.length > 25 ? ' (showing sample of first 25)' : ''}`;
}
return {
success: true,
message,
results,
metadata,
};
}
// Check if error is timeout-related
const errorMessage = result.error?.message || 'Query execution failed';
const isTimeout =
errorMessage.toLowerCase().includes('timeout') ||
errorMessage.toLowerCase().includes('timed out');
if (isTimeout && attempt < MAX_RETRIES) {
// Wait before retry
const delay = RETRY_DELAYS[attempt] || 6000;
console.warn(
`[create-metrics] SQL validation timeout on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`,
{
sqlPreview: `${sqlQuery.substring(0, 100)}...`,
attempt: attempt + 1,
nextDelay: delay,
}
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue; // Retry
}
// Not a timeout or no more retries
return {
success: false,
error: errorMessage,
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'SQL validation failed';
const isTimeout =
errorMessage.toLowerCase().includes('timeout') ||
errorMessage.toLowerCase().includes('timed out');
if (isTimeout && attempt < MAX_RETRIES) {
// Wait before retry
const delay = RETRY_DELAYS[attempt] || 6000;
console.warn(
`[create-metrics] SQL validation timeout (exception) on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delay}ms...`,
{
sqlPreview: `${sqlQuery.substring(0, 100)}...`,
attempt: attempt + 1,
nextDelay: delay,
error: errorMessage,
}
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue; // Retry
}
// Not a timeout or no more retries
return {
success: false,
error: errorMessage,
};
}
}
// Should not reach here, but just in case
return {
success: false,
error: 'Max retries exceeded for SQL validation',
};
// Note: We don't close the data source here anymore - it's managed by the workflow manager
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'SQL validation failed',
};
}
}
function generateResultMessage(
createdFiles: FileWithId[],
failedFiles: FailedFileCreation[]
): string {
if (failedFiles.length === 0) {
return `Successfully created ${createdFiles.length} metric files.`;
}
const successMsg =
createdFiles.length > 0 ? `Successfully created ${createdFiles.length} metric files. ` : '';
const failures = failedFiles.map(
(failure) =>
`Failed to create '${failure.name}': ${failure.error}.\n\nPlease recreate the metric from scratch rather than attempting to modify. This error could be due to:\n- Using a dataset that doesn't exist (please reevaluate the available datasets in the chat conversation)\n- Invalid configuration in the metric file\n- Special characters in the metric name or SQL query\n- Syntax errors in the SQL query`
);
if (failures.length === 1) {
return `${successMsg.trim()}${failures[0]}.`;
}
return `${successMsg}Failed to create ${failures.length} metric files:\n${failures.join('\n')}`;
}
/**
* Optimistic parsing function for streaming create-metrics-file tool arguments
* Extracts the files array as it's being built incrementally
*/
export function parseStreamingArgs(
accumulatedText: string
): Partial<{ files: Array<{ name: string; yml_content: string }> }> | null {
// Validate input type
if (typeof accumulatedText !== 'string') {
throw new Error(`parseStreamingArgs expects string input, got ${typeof accumulatedText}`);
}
try {
// First try to parse as complete JSON
const parsed = JSON.parse(accumulatedText);
return {
files: parsed.files || undefined,
};
} catch (error) {
// Only catch JSON parse errors - let other errors bubble up
if (error instanceof SyntaxError) {
// If JSON is incomplete, try to extract and reconstruct the files array
const filesMatch = accumulatedText.match(/"files"\s*:\s*\[(.*)/s);
if (filesMatch && filesMatch[1] !== undefined) {
const arrayContent = filesMatch[1];
try {
// Try to parse the array content by adding closing bracket
const testArray = `[${arrayContent}]`;
const parsed = JSON.parse(testArray);
return { files: parsed };
} catch {
// If that fails, try to extract file objects (both complete and incomplete)
const files: Array<{ name: string; yml_content: string }> = [];
// First, try to match complete file objects
const completeFileMatches = arrayContent.matchAll(
/\{\s*"name"\s*:\s*"([^"]*?)"\s*,\s*"yml_content"\s*:\s*"((?:[^"\\]|\\.)*)"\s*\}/g
);
for (const match of completeFileMatches) {
if (match[1] !== undefined && match[2] !== undefined) {
let ymlContent = match[2]
.replace(/\\"/g, '"')
.replace(/\\n/g, '\n')
.replace(/\\\\/g, '\\');
// Ensure timeFrame is properly quoted
ymlContent = ensureTimeFrameQuoted(ymlContent);
files.push({
name: match[1],
yml_content: ymlContent,
});
}
}
// If no complete files found, try to extract partial file objects
if (files.length === 0) {
// Try to match incomplete file objects that have at least name and partial yml_content
const incompleteFileMatch = arrayContent.match(
/\{\s*"name"\s*:\s*"([^"]*?)"\s*,\s*"yml_content"\s*:\s*"((?:[^"\\]|\\.)*)/
);
if (
incompleteFileMatch &&
incompleteFileMatch[1] !== undefined &&
incompleteFileMatch[2] !== undefined
) {
const name = incompleteFileMatch[1];
let ymlContent = incompleteFileMatch[2]
.replace(/\\"/g, '"')
.replace(/\\n/g, '\n')
.replace(/\\\\/g, '\\');
// Ensure timeFrame is properly quoted
ymlContent = ensureTimeFrameQuoted(ymlContent);
files.push({
name,
yml_content: ymlContent,
});
}
}
return { files };
}
}
// Check if we at least have the start of the files field
const partialMatch = accumulatedText.match(/"files"\s*:\s*\[/);
if (partialMatch) {
return { files: [] };
}
return null;
}
// Unexpected error - re-throw with context
throw new Error(
`Unexpected error in parseStreamingArgs: ${error instanceof Error ? error.message : 'Unknown error'}`
);
}
}