Merge pull request #1162 from buster-so/dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an

Dallin-bus-1937-the-createmetrics-and-createreports-tool-calls-are-in-an
2025-09-25 18:45:37 -06:00 · 2025-09-25 18:45:37 -06:00 · 646013a307
parent 1317958982 c0ca22ad8c
commit 646013a307
10 changed files with 205 additions and 44 deletions
--- a/packages/ai/src/agents/analyst-agent/analyst-agent-prompt.txt
+++ b/packages/ai/src/agents/analyst-agent/analyst-agent-prompt.txt
@ -344,12 +344,12 @@ You operate in a loop to complete tasks:
  - Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
 - SQL Requirements:
  - Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
-  - Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
-  - MANDATORY SQL NAMING CONVENTIONS:
+  - Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
+    - MANDATORY SQL NAMING CONVENTIONS:
    - All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
-    - All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
-    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
-    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
+    - All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
+    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
+    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
    - Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
  - Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
  - Select specific columns (avoid `SELECT *` or `COUNT(*)`).
--- a/packages/ai/src/agents/analyst-agent/analyst-agent.ts
+++ b/packages/ai/src/agents/analyst-agent/analyst-agent.ts
@ -98,10 +98,10 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {

  const docsSystemMessage = docsContent
    ? ({
-        role: 'system',
-        content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
-        providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
-      } as ModelMessage)
+      role: 'system',
+      content: `<data_catalog_docs>\n${docsContent}\n</data_catalog_docs>`,
+      providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
+    } as ModelMessage)
    : null;

  async function stream({ messages }: AnalystStreamOptions) {
@ -134,19 +134,19 @@ export function createAnalystAgent(analystAgentOptions: AnalystAgentOptions) {
    // Create analyst instructions system message with proper escaping
    const analystInstructionsMessage = analystInstructions
      ? ({
-          role: 'system',
-          content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
-          providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
-        } as ModelMessage)
+        role: 'system',
+        content: `<organization_instructions>\n${analystInstructions}\n</organization_instructions>`,
+        providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
+      } as ModelMessage)
      : null;

    // Create user personalization system message
    const userPersonalizationSystemMessage = userPersonalizationMessageContent
      ? ({
-          role: 'system',
-          content: userPersonalizationMessageContent,
-          providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
-        } as ModelMessage)
+        role: 'system',
+        content: userPersonalizationMessageContent,
+        providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
+      } as ModelMessage)
      : null;

    return wrapTraced(
--- a/packages/ai/src/agents/analyst-agent/get-analyst-agent-system-prompt.test.ts
+++ b/packages/ai/src/agents/analyst-agent/get-analyst-agent-system-prompt.test.ts
@ -75,4 +75,35 @@ describe('Analyst Agent Instructions', () => {
      getAnalystAgentSystemPrompt('   '); // whitespace only
    }).toThrow('SQL dialect guidance is required');
  });
+
+  it('should contain mandatory SQL naming conventions', () => {
+    const result = getAnalystAgentSystemPrompt('Test guidance');
+
+    // Check for MANDATORY SQL NAMING CONVENTIONS section
+    expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
+
+    // Ensure table references require full qualification
+    expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
+
+    // Ensure column references use table aliases (not full qualifiers)
+    expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
+
+    // Ensure examples show table alias usage without full qualification
+    expect(result).toContain('c.customerid');
+    expect(result).not.toContain('postgres.ont_ont.customer.customerid');
+
+    // Ensure CTE examples use table aliases correctly
+    expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
+    expect(result).toContain('c.customerid`, not just `customerid`');
+  });
+
+  it('should use column names qualified with table aliases', () => {
+    const result = getAnalystAgentSystemPrompt('Test guidance');
+
+    // Check for the updated description
+    expect(result).toContain('Use column names qualified with table aliases');
+
+    // Ensure the old verbose description is not present
+    expect(result).not.toContain('Use fully qualified column names with table aliases');
+  });
 });
--- a/packages/ai/src/agents/think-and-prep-agent/get-think-and-prep-agent-system-prompt.test.ts
+++ b/packages/ai/src/agents/think-and-prep-agent/get-think-and-prep-agent-system-prompt.test.ts
@ -145,4 +145,40 @@ describe('Think and Prep Agent Instructions', () => {
      getThinkAndPrepAgentSystemPrompt('   '); // whitespace only
    }).toThrow('SQL dialect guidance is required');
  });
+
+  describe.each([
+    ['standard', 'standard'],
+    ['investigation', 'investigation'],
+  ])('SQL naming conventions in %s mode', (modeName, mode) => {
+    it(`should contain mandatory SQL naming conventions in ${modeName} mode`, () => {
+      const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
+
+      // Check for MANDATORY SQL NAMING CONVENTIONS section
+      expect(result).toContain('MANDATORY SQL NAMING CONVENTIONS');
+
+      // Ensure table references require full qualification
+      expect(result).toContain('All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`');
+
+      // Ensure column references use table aliases (not full qualifiers)
+      expect(result).toContain('All Column References: MUST be qualified with their table alias (e.g., `c.customerid`)');
+
+      // Ensure examples show table alias usage without full qualification
+      expect(result).toContain('c.customerid');
+      expect(result).not.toContain('postgres.ont_ont.customer.customerid');
+
+      // Ensure CTE examples use table aliases correctly
+      expect(result).toContain('SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c');
+      expect(result).toContain('c.customerid`, not just `customerid`');
+    });
+
+    it(`should use column names qualified with table aliases in ${modeName} mode`, () => {
+      const result = getThinkAndPrepAgentSystemPrompt('Test guidance', mode as 'standard' | 'investigation');
+
+      // Check for the updated description
+      expect(result).toContain('Use column names qualified with table aliases');
+
+      // Ensure the old verbose description is not present
+      expect(result).not.toContain('Use fully qualified column names with table aliases');
+    });
+  });
 });
--- a/packages/ai/src/agents/think-and-prep-agent/think-and-prep-agent-investigation-prompt.txt
+++ b/packages/ai/src/agents/think-and-prep-agent/think-and-prep-agent-investigation-prompt.txt
@ -588,12 +588,12 @@ If all true → proceed to submit prep for Asset Creation with `submitThoughts`.
    - Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
 - SQL Requirements:
    - Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
-    - Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
+    - Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
    - MANDATORY SQL NAMING CONVENTIONS:
    - All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
-    - All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
-    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
-    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
+    - All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
+    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
+    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
    - Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
    - Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
    - Select specific columns (avoid `SELECT *` or `COUNT(*)`).
--- a/packages/ai/src/agents/think-and-prep-agent/think-and-prep-agent-standard-prompt.txt
+++ b/packages/ai/src/agents/think-and-prep-agent/think-and-prep-agent-standard-prompt.txt
@ -465,12 +465,12 @@ When in doubt, be more thorough rather than less. Reports are the default becaus
    - Strict JOINs: Only join tables where relationships are explicitly defined via `relationships` or `entities` keys in the provided data context/metadata. Do not join tables without a pre-defined relationship.
 - SQL Requirements:
    - Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
-    - Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
+    - Use column names qualified with table aliases (e.g., `<table_alias>.<column>`).
    - MANDATORY SQL NAMING CONVENTIONS:
    - All Table References: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
-    - All Column References: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
-    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
-    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
+    - All Column References: MUST be qualified with their table alias (e.g., `c.customerid`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
+    - Inside CTE Definitions: When defining a CTE (e.g., `WITH my_cte AS (SELECT c.customerid FROM DATABASE.SCHEMA.TABLE1 c ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `c.customerid`, not just `customerid`). This applies even if the CTE is simple and selects from only one table.
+    - Selecting From CTEs: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column_name FROM my_cte mc ...`).
    - Universal Application: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
    - Context Adherence: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
    - Select specific columns (avoid `SELECT *` or `COUNT(*)`).
--- a/packages/ai/src/llm/providers/gateway.ts
+++ b/packages/ai/src/llm/providers/gateway.ts
@ -6,8 +6,15 @@ export const DEFAULT_ANTHROPIC_OPTIONS = {
  gateway: {
    order: ['bedrock', 'anthropic', 'vertex'],
  },
-  headers: {},
-  anthropic: { cacheControl: { type: 'ephemeral' } },
+  anthropic: {
+    cacheControl: { type: 'ephemeral' },
+  },
+  bedrock: {
+    cacheControl: { type: 'ephemeral' },
+    additionalModelRequestFields: {
+      anthropic_beta: ['fine-grained-tool-streaming-2025-05-14'],
+    },
+  }
 };

 export const DEFAULT_OPENAI_OPTIONS = {
@ -15,7 +22,7 @@ export const DEFAULT_OPENAI_OPTIONS = {
    order: ['openai'],
  },
  openai: {
-    parallelToolCalls: false,
+    // parallelToolCalls: false,
    reasoningEffort: 'minimal',
    verbosity: 'low',
  },
--- a/packages/ai/src/steps/analyst-agent-steps/generate-chat-title-step/generate-chat-title-step.ts
+++ b/packages/ai/src/steps/analyst-agent-steps/generate-chat-title-step/generate-chat-title-step.ts
@ -3,8 +3,8 @@ import { generateObject } from 'ai';
 import type { ModelMessage } from 'ai';
 import { wrapTraced } from 'braintrust';
 import { z } from 'zod';
-import { GPT5Nano } from '../../../llm';
-import { DEFAULT_OPENAI_OPTIONS } from '../../../llm/providers/gateway';
+import { Haiku35 } from '../../../llm';
+import { DEFAULT_ANTHROPIC_OPTIONS } from '../../../llm/providers/gateway';

 // Zod-first: define input/output schemas and export inferred types
 export const generateChatTitleParamsSchema = z.object({
@ -56,10 +56,10 @@ async function generateTitleWithLLM(messages: ModelMessage[]): Promise<string> {
    const tracedChatTitle = wrapTraced(
      async () => {
        const { object } = await generateObject({
-          model: GPT5Nano,
+          model: Haiku35,
          schema: llmOutputSchema,
          messages: titleMessages,
-          providerOptions: DEFAULT_OPENAI_OPTIONS,
+          providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
        });

        return object;
--- a/packages/database/src/queries/messages/update-message-entries.ts
+++ b/packages/database/src/queries/messages/update-message-entries.ts
@ -21,16 +21,14 @@ const UpdateMessageEntriesSchema = z.object({

 export type UpdateMessageEntriesParams = z.infer<typeof UpdateMessageEntriesSchema>;

+// Simple in-memory queue for each messageId
+const updateQueues = new Map<string, Promise<{ success: boolean }>>();
+
 /**
- * Updates message entries with cache-first approach for streaming.
- * Cache is the source of truth during streaming, DB is updated for persistence.
- *
- * Merge logic:
- * - responseMessages: upsert by 'id' field, maintaining order
- * - reasoningMessages: upsert by 'id' field, maintaining order
- * - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
+ * Internal function that performs the actual update logic.
+ * This is separated so it can be queued.
 */
-export async function updateMessageEntries({
+async function performUpdate({
  messageId,
  rawLlmMessages,
  responseMessages,
@ -95,3 +93,41 @@ export async function updateMessageEntries({
    throw new Error(`Failed to update message entries for message ${messageId}`);
  }
 }
+
+/**
+ * Updates message entries with cache-first approach for streaming.
+ * Cache is the source of truth during streaming, DB is updated for persistence.
+ * 
+ * Updates are queued per messageId to ensure they execute in order.
+ *
+ * Merge logic:
+ * - responseMessages: upsert by 'id' field, maintaining order
+ * - reasoningMessages: upsert by 'id' field, maintaining order
+ * - rawLlmMessages: upsert by combination of 'role' and 'toolCallId', maintaining order
+ */
+export async function updateMessageEntries(
+  params: UpdateMessageEntriesParams
+): Promise<{ success: boolean }> {
+  const { messageId } = params;
+  
+  // Get the current promise for this messageId, or use a resolved promise as the starting point
+  const currentQueue = updateQueues.get(messageId) ?? Promise.resolve({ success: true });
+  
+  // Chain the new update to run after the current queue completes
+  const newQueue = currentQueue
+    .then(() => performUpdate(params))
+    .catch(() => performUpdate(params)); // Still try to run even if previous failed
+  
+  // Update the queue for this messageId
+  updateQueues.set(messageId, newQueue);
+  
+  // Clean up the queue entry once this update completes
+  newQueue.finally(() => {
+    // Only remove if this is still the current queue
+    if (updateQueues.get(messageId) === newQueue) {
+      updateQueues.delete(messageId);
+    }
+  });
+  
+  return newQueue;
+}
--- a/packages/database/src/queries/reports/batch-update-report.ts
+++ b/packages/database/src/queries/reports/batch-update-report.ts
@ -31,18 +31,26 @@ type VersionHistoryEntry = {

 type VersionHistory = Record<string, VersionHistoryEntry>;

+// Simple in-memory queue for each reportId
+const updateQueues = new Map<string, Promise<{
+  id: string;
+  name: string;
+  content: string;
+  versionHistory: VersionHistory | null;
+}>>();
+
 /**
- * Updates a report with new content, optionally name, and version history in a single operation
- * This is more efficient than multiple individual updates
+ * Internal function that performs the actual update logic.
+ * This is separated so it can be queued.
 */
-export const batchUpdateReport = async (
+async function performUpdate(
  params: BatchUpdateReportInput
 ): Promise<{
  id: string;
  name: string;
  content: string;
  versionHistory: VersionHistory | null;
-}> => {
+}> {
  const { reportId, content, name, versionHistory } = BatchUpdateReportInputSchema.parse(params);

  try {
@ -93,4 +101,47 @@ export const batchUpdateReport = async (

    throw new Error('Failed to batch update report');
  }
+}
+
+/**
+ * Updates a report with new content, optionally name, and version history in a single operation
+ * This is more efficient than multiple individual updates
+ * 
+ * Updates are queued per reportId to ensure they execute in order.
+ */
+export const batchUpdateReport = async (
+  params: BatchUpdateReportInput
+): Promise<{
+  id: string;
+  name: string;
+  content: string;
+  versionHistory: VersionHistory | null;
+}> => {
+  const { reportId } = params;
+  
+  // Get the current promise for this reportId, or use a resolved promise as the starting point
+  const currentQueue = updateQueues.get(reportId) ?? Promise.resolve({
+    id: '',
+    name: '',
+    content: '',
+    versionHistory: null
+  });
+  
+  // Chain the new update to run after the current queue completes
+  const newQueue = currentQueue
+    .then(() => performUpdate(params))
+    .catch(() => performUpdate(params)); // Still try to run even if previous failed
+  
+  // Update the queue for this reportId
+  updateQueues.set(reportId, newQueue);
+  
+  // Clean up the queue entry once this update completes
+  newQueue.finally(() => {
+    // Only remove if this is still the current queue
+    if (updateQueues.get(reportId) === newQueue) {
+      updateQueues.delete(reportId);
+    }
+  });
+  
+  return newQueue;
 };