mirror of https://github.com/buster-so/buster.git
Merge pull request #1162 from buster-so/dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an
Dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an
This commit is contained in:
commit
646013a307
|
@ -344,12 +344,12 @@ You operate in a loop to complete tasks:
|
|||
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
|
||||
- SQL Requirements:
|
||||
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
|
||||
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- MANDATORY SQL NAMING CONVENTIONS:
|
||||
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- MANDATORY SQL NAMING CONVENTIONS:
|
||||
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
|
||||
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
|
||||
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
|
||||
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).
|
||||
|
|
|
@ -98,10 +98,10 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {
|
|||
|
||||
const docsSystemMessage = docsContent
|
||||
? ({
|
||||
role: 'system',
|
||||
content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
role: 'system',
|
||||
content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
: null;
|
||||
|
||||
async function stream({ messages }: AnalystStreamOptions) {
|
||||
|
@ -134,19 +134,19 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {
|
|||
// Create analyst instructions system message with proper escaping
|
||||
const analystInstructionsMessage = analystInstructions
|
||||
? ({
|
||||
role: 'system',
|
||||
content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
role: 'system',
|
||||
content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
: null;
|
||||
|
||||
// Create user personalization system message
|
||||
const userPersonalizationSystemMessage = userPersonalizationMessageContent
|
||||
? ({
|
||||
role: 'system',
|
||||
content: userPersonalizationMessageContent,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
role: 'system',
|
||||
content: userPersonalizationMessageContent,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
} as ModelMessage)
|
||||
: null;
|
||||
|
||||
return wrapTraced(
|
||||
|
|
|
@ -75,4 +75,35 @@ describe('Analyst Agent Instructions', () => {
|
|||
getAnalystAgentSystemPrompt(' '); // whitespace only
|
||||
}).toThrow('SQL dialect guidance is required');
|
||||
});
|
||||
|
||||
it('should contain mandatory SQL naming conventions', () => {
|
||||
const result = getAnalystAgentSystemPrompt('Test guidance');
|
||||
|
||||
// Check for MANDATORY SQL NAMING CONVENTIONS section
|
||||
expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
|
||||
|
||||
// Ensure table references require full qualification
|
||||
expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
|
||||
|
||||
// Ensure column references use table aliases (not full qualifiers)
|
||||
expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
|
||||
|
||||
// Ensure examples show table alias usage without full qualification
|
||||
expect(result).toContain('c.customerid');
|
||||
expect(result).not.toContain('postgres.ont_ont.customer.customerid');
|
||||
|
||||
// Ensure CTE examples use table aliases correctly
|
||||
expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
|
||||
expect(result).toContain('c.customerid`, not just `customerid`');
|
||||
});
|
||||
|
||||
it('should use column names qualified with table aliases', () => {
|
||||
const result = getAnalystAgentSystemPrompt('Test guidance');
|
||||
|
||||
// Check for the updated description
|
||||
expect(result).toContain('Use column names qualified with table aliases');
|
||||
|
||||
// Ensure the old verbose description is not present
|
||||
expect(result).not.toContain('Use fully qualified column names with table aliases');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -145,4 +145,40 @@ describe('Think and Prep Agent Instructions', () => {
|
|||
getThinkAndPrepAgentSystemPrompt(' '); // whitespace only
|
||||
}).toThrow('SQL dialect guidance is required');
|
||||
});
|
||||
|
||||
describe.each([
|
||||
['standard', 'standard'],
|
||||
['investigation', 'investigation'],
|
||||
])('SQL naming conventions in %s mode', (modeName, mode) => {
|
||||
it(`should contain mandatory SQL naming conventions in ${modeName} mode`, () => {
|
||||
const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
|
||||
|
||||
// Check for MANDATORY SQL NAMING CONVENTIONS section
|
||||
expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
|
||||
|
||||
// Ensure table references require full qualification
|
||||
expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
|
||||
|
||||
// Ensure column references use table aliases (not full qualifiers)
|
||||
expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
|
||||
|
||||
// Ensure examples show table alias usage without full qualification
|
||||
expect(result).toContain('c.customerid');
|
||||
expect(result).not.toContain('postgres.ont_ont.customer.customerid');
|
||||
|
||||
// Ensure CTE examples use table aliases correctly
|
||||
expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
|
||||
expect(result).toContain('c.customerid`, not just `customerid`');
|
||||
});
|
||||
|
||||
it(`should use column names qualified with table aliases in ${modeName} mode`, () => {
|
||||
const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
|
||||
|
||||
// Check for the updated description
|
||||
expect(result).toContain('Use column names qualified with table aliases');
|
||||
|
||||
// Ensure the old verbose description is not present
|
||||
expect(result).not.toContain('Use fully qualified column names with table aliases');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -588,12 +588,12 @@ If all true → proceed to submit prep for Asset Creation with `submitThoughts`.
|
|||
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
|
||||
- SQL Requirements:
|
||||
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
|
||||
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- MANDATORY SQL NAMING CONVENTIONS:
|
||||
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
|
||||
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
|
||||
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
|
||||
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).
|
||||
|
|
|
@ -465,12 +465,12 @@ When in doubt, be more thorough rather than less. Reports are the default becaus
|
|||
- Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
|
||||
- SQL Requirements:
|
||||
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
|
||||
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- MANDATORY SQL NAMING CONVENTIONS:
|
||||
- All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
|
||||
- All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
|
||||
- Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
|
||||
- Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
|
||||
- Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
|
||||
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).
|
||||
|
|
|
@ -6,8 +6,15 @@ export const DEFAULT_ANTHROPIC_OPTIONS = {
|
|||
gateway: {
|
||||
order: ['bedrock', 'anthropic', 'vertex'],
|
||||
},
|
||||
headers: {},
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } },
|
||||
anthropic: {
|
||||
cacheControl: { type: 'ephemeral' },
|
||||
},
|
||||
bedrock: {
|
||||
cacheControl: { type: 'ephemeral' },
|
||||
additionalModelRequestFields: {
|
||||
anthropic_beta: ['fine-grained-tool-streaming-2025-05-14'],
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
export const DEFAULT_OPENAI_OPTIONS = {
|
||||
|
@ -15,7 +22,7 @@ export const DEFAULT_OPENAI_OPTIONS = {
|
|||
order: ['openai'],
|
||||
},
|
||||
openai: {
|
||||
parallelToolCalls: false,
|
||||
// parallelToolCalls: false,
|
||||
reasoningEffort: 'minimal',
|
||||
verbosity: 'low',
|
||||
},
|
||||
|
|
|
@ -3,8 +3,8 @@ import { generateObject } from 'ai';
|
|||
import type { ModelMessage } from 'ai';
|
||||
import { wrapTraced } from 'braintrust';
|
||||
import { z } from 'zod';
|
||||
import { GPT5Nano } from '../../../llm';
|
||||
import { DEFAULT_OPENAI_OPTIONS } from '../../../llm/providers/gateway';
|
||||
import { Haiku35 } from '../../../llm';
|
||||
import { DEFAULT_ANTHROPIC_OPTIONS } from '../../../llm/providers/gateway';
|
||||
|
||||
// Zod-first: define input/output schemas and export inferred types
|
||||
export const generateChatTitleParamsSchema = z.object({
|
||||
|
@ -56,10 +56,10 @@ async function generateTitleWithLLM(messages: ModelMessage[]): Promise<string> {
|
|||
const tracedChatTitle = wrapTraced(
|
||||
async () => {
|
||||
const { object } = await generateObject({
|
||||
model: GPT5Nano,
|
||||
model: Haiku35,
|
||||
schema: llmOutputSchema,
|
||||
messages: titleMessages,
|
||||
providerOptions: DEFAULT_OPENAI_OPTIONS,
|
||||
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
|
||||
});
|
||||
|
||||
return object;
|
||||
|
|
|
@ -21,16 +21,14 @@ const UpdateMessageEntriesSchema = z.object({
|
|||
|
||||
export type UpdateMessageEntriesParams = z.infer<typeof UpdateMessageEntriesSchema>;
|
||||
|
||||
// Simple in-memory queue for each messageId
|
||||
const updateQueues = new Map<string, Promise<{ success: boolean }>>();
|
||||
|
||||
/**
|
||||
* Updates message entries with cache-first approach for streaming.
|
||||
* Cache is the source of truth during streaming, DB is updated for persistence.
|
||||
*
|
||||
* Merge logic:
|
||||
* - responseMessages: upsert by 'id' field, maintaining order
|
||||
* - reasoningMessages: upsert by 'id' field, maintaining order
|
||||
* - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
|
||||
* Internal function that performs the actual update logic.
|
||||
* This is separated so it can be queued.
|
||||
*/
|
||||
export async function updateMessageEntries({
|
||||
async function performUpdate({
|
||||
messageId,
|
||||
rawLlmMessages,
|
||||
responseMessages,
|
||||
|
@ -95,3 +93,41 @@ export async function updateMessageEntries({
|
|||
throw new Error(`Failed to update message entries for message ${messageId}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates message entries with cache-first approach for streaming.
|
||||
* Cache is the source of truth during streaming, DB is updated for persistence.
|
||||
*
|
||||
* Updates are queued per messageId to ensure they execute in order.
|
||||
*
|
||||
* Merge logic:
|
||||
* - responseMessages: upsert by 'id' field, maintaining order
|
||||
* - reasoningMessages: upsert by 'id' field, maintaining order
|
||||
* - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
|
||||
*/
|
||||
export async function updateMessageEntries(
|
||||
params: UpdateMessageEntriesParams
|
||||
): Promise<{ success: boolean }> {
|
||||
const { messageId } = params;
|
||||
|
||||
// Get the current promise for this messageId, or use a resolved promise as the starting point
|
||||
const currentQueue = updateQueues.get(messageId) ?? Promise.resolve({ success: true });
|
||||
|
||||
// Chain the new update to run after the current queue completes
|
||||
const newQueue = currentQueue
|
||||
.then(() => performUpdate(params))
|
||||
.catch(() => performUpdate(params)); // Still try to run even if previous failed
|
||||
|
||||
// Update the queue for this messageId
|
||||
updateQueues.set(messageId, newQueue);
|
||||
|
||||
// Clean up the queue entry once this update completes
|
||||
newQueue.finally(() => {
|
||||
// Only remove if this is still the current queue
|
||||
if (updateQueues.get(messageId) === newQueue) {
|
||||
updateQueues.delete(messageId);
|
||||
}
|
||||
});
|
||||
|
||||
return newQueue;
|
||||
}
|
||||
|
|
|
@ -31,18 +31,26 @@ type VersionHistoryEntry = {
|
|||
|
||||
type VersionHistory = Record<string, VersionHistoryEntry>;
|
||||
|
||||
// Simple in-memory queue for each reportId
|
||||
const updateQueues = new Map<string, Promise<{
|
||||
id: string;
|
||||
name: string;
|
||||
content: string;
|
||||
versionHistory: VersionHistory | null;
|
||||
}>>();
|
||||
|
||||
/**
|
||||
* Updates a report with new content, optionally name, and version history in a single operation
|
||||
* This is more efficient than multiple individual updates
|
||||
* Internal function that performs the actual update logic.
|
||||
* This is separated so it can be queued.
|
||||
*/
|
||||
export const batchUpdateReport = async (
|
||||
async function performUpdate(
|
||||
params: BatchUpdateReportInput
|
||||
): Promise<{
|
||||
id: string;
|
||||
name: string;
|
||||
content: string;
|
||||
versionHistory: VersionHistory | null;
|
||||
}> => {
|
||||
}> {
|
||||
const { reportId, content, name, versionHistory } = BatchUpdateReportInputSchema.parse(params);
|
||||
|
||||
try {
|
||||
|
@ -93,4 +101,47 @@ export const batchUpdateReport = async (
|
|||
|
||||
throw new Error('Failed to batch update report');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a report with new content, optionally name, and version history in a single operation
|
||||
* This is more efficient than multiple individual updates
|
||||
*
|
||||
* Updates are queued per reportId to ensure they execute in order.
|
||||
*/
|
||||
export const batchUpdateReport = async (
|
||||
params: BatchUpdateReportInput
|
||||
): Promise<{
|
||||
id: string;
|
||||
name: string;
|
||||
content: string;
|
||||
versionHistory: VersionHistory | null;
|
||||
}> => {
|
||||
const { reportId } = params;
|
||||
|
||||
// Get the current promise for this reportId, or use a resolved promise as the starting point
|
||||
const currentQueue = updateQueues.get(reportId) ?? Promise.resolve({
|
||||
id: '',
|
||||
name: '',
|
||||
content: '',
|
||||
versionHistory: null
|
||||
});
|
||||
|
||||
// Chain the new update to run after the current queue completes
|
||||
const newQueue = currentQueue
|
||||
.then(() => performUpdate(params))
|
||||
.catch(() => performUpdate(params)); // Still try to run even if previous failed
|
||||
|
||||
// Update the queue for this reportId
|
||||
updateQueues.set(reportId, newQueue);
|
||||
|
||||
// Clean up the queue entry once this update completes
|
||||
newQueue.finally(() => {
|
||||
// Only remove if this is still the current queue
|
||||
if (updateQueues.get(reportId) === newQueue) {
|
||||
updateQueues.delete(reportId);
|
||||
}
|
||||
});
|
||||
|
||||
return newQueue;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue