import { RuntimeContext } from '@mastra/core/runtime-context'; import type { CoreMessage } from 'ai'; import { NoSuchToolError } from 'ai'; import { describe, expect, it, vi } from 'vitest'; import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent'; import { retryableAgentStreamWithHealing } from '../../../src/utils/retry'; import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow'; describe('Think-and-Prep Agent - Tool Error Healing Integration', () => { it.skip('should heal when think-and-prep agent tries to call create-metrics-file - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => { // This simulates the real issue: think-and-prep agent trying to create files // when it should only be thinking and preparing const messages: CoreMessage[] = [ { role: 'user', content: 'Create a dashboard showing monthly sales trends', }, { role: 'assistant', content: [ { type: 'text', text: "I'll create a dashboard for your monthly sales trends. Let me start by creating the metrics file.", }, ], }, { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'call_thinkprep_123', toolName: 'create-metrics-file', // THIS TOOL DOESN'T EXIST IN THINK-AND-PREP! args: { files: [ { file_name: 'sales_metrics.yml', datasource: 'sales_db', collections: [ { name: 'monthly_sales', sql: 'SELECT * FROM sales', }, ], }, ], }, }, ], }, ]; let healingOccurred = false; let healedError: any; const runtimeContext = new RuntimeContext(); runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); runtimeContext.set('chatId', crypto.randomUUID()); runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); runtimeContext.set('dataSourceSyntax', 'postgres'); const result = await retryableAgentStreamWithHealing({ agent: thinkAndPrepAgent, messages, options: { toolCallStreaming: true, runtimeContext, }, retryConfig: { maxRetries: 3, onRetry: (error, attempt) => { console.log(`Think-and-Prep healing attempt ${attempt}:`, error.type); healingOccurred = true; healedError = error; // Verify the error message mentions available tools if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) { const toolResult = error.healingMessage.content[0]; if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) { console.log('Error message:', toolResult.result.error); // Should list the actual available tools: sequentialThinking, executeSql, respondWithoutAnalysis, submitThoughts expect(toolResult.result.error).toContain('sequentialThinking'); expect(toolResult.result.error).toContain('executeSql'); expect(toolResult.result.error).not.toContain('create-metrics-file'); } } }, }, }); expect(result.stream).toBeDefined(); expect(healingOccurred).toBe(true); expect(healedError?.type).toBe('no-such-tool'); }); it.skip('should heal when think-and-prep tries multiple visualization tools - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => { // Simulate the agent trying various visualization tools it doesn't have const messages: CoreMessage[] = [ { role: 'user', content: 'Analyze sales data and create visualizations', }, { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'call_1', toolName: 'create-dashboards-file', // Not available! args: { files: [] }, }, ], }, ]; const healingAttempts: any[] = []; const result = await retryableAgentStreamWithHealing({ agent: thinkAndPrepAgent, messages, options: { toolCallStreaming: true, runtimeContext: (() => { const ctx = new RuntimeContext(); ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); ctx.set('chatId', crypto.randomUUID()); ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); ctx.set('dataSourceSyntax', 'postgres'); return ctx; })(), }, retryConfig: { maxRetries: 3, onRetry: (error, attempt) => { healingAttempts.push({ attempt, errorType: error.type, toolName: error.originalError?.toolName || 'unknown', }); }, }, }); expect(result.stream).toBeDefined(); expect(healingAttempts.length).toBeGreaterThan(0); expect(healingAttempts[0].errorType).toBe('no-such-tool'); expect(healingAttempts[0].toolName).toBe('create-dashboards-file'); }); it('should complete successfully when using only available tools', async () => { // This should work without any healing needed const messages: CoreMessage[] = [ { role: 'user', content: 'Think about how to analyze sales data', }, { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'call_valid_1', toolName: 'sequentialThinking', // This tool exists! args: { thought: 'I need to understand the sales data structure first', }, }, ], }, ]; let healingOccurred = false; const result = await retryableAgentStreamWithHealing({ agent: thinkAndPrepAgent, messages, options: { toolCallStreaming: true, runtimeContext: (() => { const ctx = new RuntimeContext(); ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); ctx.set('chatId', crypto.randomUUID()); ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); ctx.set('dataSourceSyntax', 'postgres'); return ctx; })(), }, retryConfig: { maxRetries: 3, onRetry: () => { healingOccurred = true; }, }, }); expect(result.stream).toBeDefined(); expect(healingOccurred).toBe(false); // No healing should occur expect(result.retryCount).toBe(0); }); it.skip('should provide helpful error messages listing available tools - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => { const messages: CoreMessage[] = [ { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'call_test', toolName: 'modify-metrics-file', args: {}, }, ], }, ]; let capturedErrorMessage = ''; const result = await retryableAgentStreamWithHealing({ agent: thinkAndPrepAgent, messages, options: { toolCallStreaming: true, runtimeContext: (() => { const ctx = new RuntimeContext(); ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); ctx.set('chatId', crypto.randomUUID()); ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); ctx.set('dataSourceSyntax', 'postgres'); return ctx; })(), }, retryConfig: { maxRetries: 1, onRetry: (error) => { if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) { const toolResult = error.healingMessage.content[0]; if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) { capturedErrorMessage = toolResult.result.error as string; } } }, }, }); expect(result.stream).toBeDefined(); // Verify the error message lists the correct available tools expect(capturedErrorMessage).toContain('Tool "modify-metrics-file" is not available'); expect(capturedErrorMessage).toContain('Available tools:'); expect(capturedErrorMessage).toContain('sequentialThinking'); expect(capturedErrorMessage).toContain('executeSql'); expect(capturedErrorMessage).toContain('respondWithoutAnalysis'); expect(capturedErrorMessage).toContain('submitThoughts'); // Verify it does NOT list visualization tools expect(capturedErrorMessage).not.toContain('create-metrics-file'); expect(capturedErrorMessage).not.toContain('create-dashboards-file'); }); }); // Real-world scenario test describe('Think-and-Prep Real Scenario', () => { it('should handle agent confusion about its role', async () => { // This tests a common scenario where the agent gets confused about what it can do const messages: CoreMessage[] = [ { role: 'user', content: 'I need you to create a sales dashboard with revenue trends, top products, and customer segments', }, // The agent might naturally try to create files here ]; const healingMessages: string[] = []; let finalToolUsed: string | undefined; const result = await retryableAgentStreamWithHealing({ agent: thinkAndPrepAgent, messages, options: { toolCallStreaming: true, runtimeContext: (() => { const ctx = new RuntimeContext(); ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); ctx.set('chatId', crypto.randomUUID()); ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); ctx.set('dataSourceSyntax', 'postgres'); return ctx; })(), temperature: 0.5, // Add some variability }, retryConfig: { maxRetries: 5, // Allow more retries for learning onRetry: (error, attempt) => { if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) { const toolResult = error.healingMessage.content[0]; if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) { healingMessages.push(`Attempt ${attempt}: ${toolResult.result.error}`); } } }, }, }); // Process the stream to see what tool it eventually uses for await (const chunk of result.stream.fullStream) { if (chunk.type === 'tool-call') { finalToolUsed = chunk.toolName; console.log(`Agent eventually used: ${chunk.toolName}`); } } expect(result.stream).toBeDefined(); // The agent should eventually use one of its available tools if (finalToolUsed) { expect([ 'sequentialThinking', 'executeSql', 'respondWithoutAnalysis', 'submitThoughts', ]).toContain(finalToolUsed); } // Log healing attempts for debugging if (healingMessages.length > 0) { console.log('Healing messages:', healingMessages); } }); });