Merge pull request #1162 from buster-so/dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an

Dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an
This commit is contained in:
dal 2025-09-25 18:45:37 -06:00 committed by GitHub
commit 646013a307
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 205 additions and 44 deletions

View File

@ -344,12 +344,12 @@ You operate in a loop to complete tasks:
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
- SQL Requirements:
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
- MANDATORY SQL NAMING CONVENTIONS:
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
- MANDATORY SQL NAMING CONVENTIONS:
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).

View File

@ -98,10 +98,10 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {
const docsSystemMessage = docsContent
? ({
role: 'system',
content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
role: 'system',
content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
: null;
async function stream({ messages }: AnalystStreamOptions) {
@ -134,19 +134,19 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {
// Create analyst instructions system message with proper escaping
const analystInstructionsMessage = analystInstructions
? ({
role: 'system',
content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
role: 'system',
content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
: null;
// Create user personalization system message
const userPersonalizationSystemMessage = userPersonalizationMessageContent
? ({
role: 'system',
content: userPersonalizationMessageContent,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
role: 'system',
content: userPersonalizationMessageContent,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
} as ModelMessage)
: null;
return wrapTraced(

View File

@ -75,4 +75,35 @@ describe('Analyst Agent Instructions', () => {
getAnalystAgentSystemPrompt(' '); // whitespace only
}).toThrow('SQL dialect guidance is required');
});
it('should contain mandatory SQL naming conventions', () => {
const result = getAnalystAgentSystemPrompt('Test guidance');
// Check for MANDATORY SQL NAMING CONVENTIONS section
expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
// Ensure table references require full qualification
expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
// Ensure column references use table aliases (not full qualifiers)
expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
// Ensure examples show table alias usage without full qualification
expect(result).toContain('c.customerid');
expect(result).not.toContain('postgres.ont_ont.customer.customerid');
// Ensure CTE examples use table aliases correctly
expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
expect(result).toContain('c.customerid`, not just `customerid`');
});
it('should use column names qualified with table aliases', () => {
const result = getAnalystAgentSystemPrompt('Test guidance');
// Check for the updated description
expect(result).toContain('Use column names qualified with table aliases');
// Ensure the old verbose description is not present
expect(result).not.toContain('Use fully qualified column names with table aliases');
});
});

View File

@ -145,4 +145,40 @@ describe('Think and Prep Agent Instructions', () => {
getThinkAndPrepAgentSystemPrompt(' '); // whitespace only
}).toThrow('SQL dialect guidance is required');
});
describe.each([
['standard', 'standard'],
['investigation', 'investigation'],
])('SQL naming conventions in %s mode', (modeName, mode) => {
it(`should contain mandatory SQL naming conventions in ${modeName} mode`, () => {
const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
// Check for MANDATORY SQL NAMING CONVENTIONS section
expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
// Ensure table references require full qualification
expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
// Ensure column references use table aliases (not full qualifiers)
expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
// Ensure examples show table alias usage without full qualification
expect(result).toContain('c.customerid');
expect(result).not.toContain('postgres.ont_ont.customer.customerid');
// Ensure CTE examples use table aliases correctly
expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
expect(result).toContain('c.customerid`, not just `customerid`');
});
it(`should use column names qualified with table aliases in ${modeName} mode`, () => {
const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
// Check for the updated description
expect(result).toContain('Use column names qualified with table aliases');
// Ensure the old verbose description is not present
expect(result).not.toContain('Use fully qualified column names with table aliases');
});
});
});

View File

@ -588,12 +588,12 @@ If all true → proceed to submit prep for Asset Creation with `submitThoughts`.
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
- SQL Requirements:
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
- MANDATORY SQL NAMING CONVENTIONS:
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).

View File

@ -465,12 +465,12 @@ When in doubt, be more thorough rather than less. Reports are the default becaus
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
- SQL Requirements:
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
- MANDATORY SQL NAMING CONVENTIONS:
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).

View File

@ -6,8 +6,15 @@ export const DEFAULT_ANTHROPIC_OPTIONS = {
gateway: {
order: ['bedrock', 'anthropic', 'vertex'],
},
headers: {},
anthropic: { cacheControl: { type: 'ephemeral' } },
anthropic: {
cacheControl: { type: 'ephemeral' },
},
bedrock: {
cacheControl: { type: 'ephemeral' },
additionalModelRequestFields: {
anthropic_beta: ['fine-grained-tool-streaming-2025-05-14'],
},
}
};
export const DEFAULT_OPENAI_OPTIONS = {
@ -15,7 +22,7 @@ export const DEFAULT_OPENAI_OPTIONS = {
order: ['openai'],
},
openai: {
parallelToolCalls: false,
// parallelToolCalls: false,
reasoningEffort: 'minimal',
verbosity: 'low',
},

View File

@ -3,8 +3,8 @@ import { generateObject } from 'ai';
import type { ModelMessage } from 'ai';
import { wrapTraced } from 'braintrust';
import { z } from 'zod';
import { GPT5Nano } from '../../../llm';
import { DEFAULT_OPENAI_OPTIONS } from '../../../llm/providers/gateway';
import { Haiku35 } from '../../../llm';
import { DEFAULT_ANTHROPIC_OPTIONS } from '../../../llm/providers/gateway';
// Zod-first: define input/output schemas and export inferred types
export const generateChatTitleParamsSchema = z.object({
@ -56,10 +56,10 @@ async function generateTitleWithLLM(messages: ModelMessage[]): Promise<string> {
const tracedChatTitle = wrapTraced(
async () => {
const { object } = await generateObject({
model: GPT5Nano,
model: Haiku35,
schema: llmOutputSchema,
messages: titleMessages,
providerOptions: DEFAULT_OPENAI_OPTIONS,
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
});
return object;

View File

@ -21,16 +21,14 @@ const UpdateMessageEntriesSchema = z.object({
export type UpdateMessageEntriesParams = z.infer<typeof UpdateMessageEntriesSchema>;
// Simple in-memory queue for each messageId
const updateQueues = new Map<string, Promise<{ success: boolean }>>();
/**
* Updates message entries with cache-first approach for streaming.
* Cache is the source of truth during streaming, DB is updated for persistence.
*
* Merge logic:
* - responseMessages: upsert by 'id' field, maintaining order
* - reasoningMessages: upsert by 'id' field, maintaining order
* - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
* Internal function that performs the actual update logic.
* This is separated so it can be queued.
*/
export async function updateMessageEntries({
async function performUpdate({
messageId,
rawLlmMessages,
responseMessages,
@ -95,3 +93,41 @@ export async function updateMessageEntries({
throw new Error(`Failed to update message entries for message ${messageId}`);
}
}
/**
* Updates message entries with cache-first approach for streaming.
* Cache is the source of truth during streaming, DB is updated for persistence.
*
* Updates are queued per messageId to ensure they execute in order.
*
* Merge logic:
* - responseMessages: upsert by 'id' field, maintaining order
* - reasoningMessages: upsert by 'id' field, maintaining order
* - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
*/
export async function updateMessageEntries(
params: UpdateMessageEntriesParams
): Promise<{ success: boolean }> {
const { messageId } = params;
// Get the current promise for this messageId, or use a resolved promise as the starting point
const currentQueue = updateQueues.get(messageId) ?? Promise.resolve({ success: true });
// Chain the new update to run after the current queue completes
const newQueue = currentQueue
.then(() => performUpdate(params))
.catch(() => performUpdate(params)); // Still try to run even if previous failed
// Update the queue for this messageId
updateQueues.set(messageId, newQueue);
// Clean up the queue entry once this update completes
newQueue.finally(() => {
// Only remove if this is still the current queue
if (updateQueues.get(messageId) === newQueue) {
updateQueues.delete(messageId);
}
});
return newQueue;
}

View File

@ -31,18 +31,26 @@ type VersionHistoryEntry = {
type VersionHistory = Record<string, VersionHistoryEntry>;
// Simple in-memory queue for each reportId
const updateQueues = new Map<string, Promise<{
id: string;
name: string;
content: string;
versionHistory: VersionHistory | null;
}>>();
/**
* Updates a report with new content, optionally name, and version history in a single operation
* This is more efficient than multiple individual updates
* Internal function that performs the actual update logic.
* This is separated so it can be queued.
*/
export const batchUpdateReport = async (
async function performUpdate(
params: BatchUpdateReportInput
): Promise<{
id: string;
name: string;
content: string;
versionHistory: VersionHistory | null;
}> => {
}> {
const { reportId, content, name, versionHistory } = BatchUpdateReportInputSchema.parse(params);
try {
@ -93,4 +101,47 @@ export const batchUpdateReport = async (
throw new Error('Failed to batch update report');
}
}
/**
* Updates a report with new content, optionally name, and version history in a single operation
* This is more efficient than multiple individual updates
*
* Updates are queued per reportId to ensure they execute in order.
*/
export const batchUpdateReport = async (
params: BatchUpdateReportInput
): Promise<{
id: string;
name: string;
content: string;
versionHistory: VersionHistory | null;
}> => {
const { reportId } = params;
// Get the current promise for this reportId, or use a resolved promise as the starting point
const currentQueue = updateQueues.get(reportId) ?? Promise.resolve({
id: '',
name: '',
content: '',
versionHistory: null
});
// Chain the new update to run after the current queue completes
const newQueue = currentQueue
.then(() => performUpdate(params))
.catch(() => performUpdate(params)); // Still try to run even if previous failed
// Update the queue for this reportId
updateQueues.set(reportId, newQueue);
// Clean up the queue entry once this update completes
newQueue.finally(() => {
// Only remove if this is still the current queue
if (updateQueues.get(reportId) === newQueue) {
updateQueues.delete(reportId);
}
});
return newQueue;
};