import { RuntimeContext } from '@mastra/core/runtime-context'; import type { CoreMessage } from 'ai'; import { NoSuchToolError } from 'ai'; import { describe, expect, it, vi } from 'vitest'; import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent'; import { retryableAgentStreamWithHealing } from '../../../src/utils/retry'; import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow'; /** * DEFINITIVE PROOF TEST * This test proves that our healing mechanism works by: * 1. Forcing a tool error * 2. Capturing the healing response * 3. Verifying the agent continues after healing */ describe('Definitive Healing Proof', () => { it('PROOF: Agent heals from NoSuchToolError and continues execution', async () => { // Step 1: Create a message that will definitely trigger a NoSuchToolError const messages: CoreMessage[] = [ { role: 'user', content: 'Analyze my data', }, { role: 'assistant', content: [ { type: 'tool-call', toolCallId: 'proof_call_123', toolName: 'create-metrics-file', // 100% GUARANTEED TO FAIL - NOT IN AGENT'S TOOLS args: { files: [ { file_name: 'test.yml', datasource: 'test', collections: [], }, ], }, }, ], }, ]; // Step 2: Track healing behavior const healingLog = { errorsCaught: 0, healingMessagesReturned: 0, availableToolsListed: false, correctToolsMentioned: [] as string[], streamCompleted: false, chunksAfterHealing: 0, onErrorCalled: false, healingResponse: null as any, }; // Step 3: Run with our healing mechanism const runtimeContext = new RuntimeContext(); runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); runtimeContext.set('chatId', crypto.randomUUID()); runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); runtimeContext.set('dataSourceSyntax', 'postgres'); // Create a mock agent to capture onError behavior const mockAgent = { ...thinkAndPrepAgent, stream: vi.fn().mockImplementation(async (messages, options) => { // Capture the onError callback const onError = options?.onError; return { fullStream: { async *[Symbol.asyncIterator]() { // Yield initial content yield { type: 'text-delta', text: 'Processing...' }; // Simulate the tool error when processing the bad tool call if ( messages.some( (m) => m.role === 'assistant' && Array.isArray(m.content) && m.content.some( (c: any) => c.type === 'tool-call' && c.toolName === 'create-metrics-file' ) ) ) { // Create the error const toolError = new NoSuchToolError({ toolName: 'create-metrics-file', availableTools: [ 'sequentialThinking', 'executeSql', 'respondWithoutAnalysis', 'submitThoughts', ], }); (toolError as any).toolCallId = 'proof_call_123'; // Call onError and capture the healing response if (onError) { healingLog.onErrorCalled = true; healingLog.healingResponse = onError(toolError); healingLog.errorsCaught++; // Analyze the healing response if (healingLog.healingResponse && 'error' in healingLog.healingResponse) { const errorMessage = healingLog.healingResponse.error; healingLog.healingMessagesReturned++; if (errorMessage.includes('Tool "create-metrics-file" is not available')) { healingLog.availableToolsListed = true; } // Check that correct tools are listed if (errorMessage.includes('sequentialThinking')) healingLog.correctToolsMentioned.push('sequentialThinking'); if (errorMessage.includes('executeSql')) healingLog.correctToolsMentioned.push('executeSql'); if (errorMessage.includes('respondWithoutAnalysis')) healingLog.correctToolsMentioned.push('respondWithoutAnalysis'); if (errorMessage.includes('submitThoughts')) healingLog.correctToolsMentioned.push('submitThoughts'); } // Yield the healing as a tool result yield { type: 'tool-result', toolCallId: 'proof_call_123', toolName: 'create-metrics-file', result: healingLog.healingResponse, }; } } // Continue after healing yield { type: 'text-delta', text: 'Continuing after healing...' }; healingLog.streamCompleted = true; }, }, }; }), }; const result = await retryableAgentStreamWithHealing({ agent: mockAgent as any, messages, options: { toolCallStreaming: true, runtimeContext, }, retryConfig: { maxRetries: 3, onRetry: (error, attempt) => { // This is only called for stream creation retries, not in-stream healing console.log('Stream creation retry:', error.type, attempt); }, }, }); // Step 4: Verify stream was created successfully expect(result.stream).toBeDefined(); // Note: retryCount reflects stream creation retries, not in-stream healing // In-stream healing via onError doesn't increment retryCount // Step 5: Process stream to verify it continues after healing try { for await (const chunk of result.stream.fullStream) { healingLog.chunksAfterHealing++; // Break after a few chunks to avoid long test if (healingLog.chunksAfterHealing > 5) break; } } catch (error) { // If we get here, healing failed throw new Error(`Stream failed after healing: ${error}`); } // Check healing log after stream is consumed expect(healingLog.onErrorCalled).toBe(true); expect(healingLog.healingResponse).toBeDefined(); expect(healingLog.healingResponse.error).toContain( 'Tool "create-metrics-file" is not available' ); // Step 6: FINAL VERIFICATION - All healing behaviors occurred expect(healingLog.errorsCaught).toBe(1); expect(healingLog.healingMessagesReturned).toBe(1); expect(healingLog.availableToolsListed).toBe(true); expect(healingLog.correctToolsMentioned).toContain('sequentialThinking'); expect(healingLog.correctToolsMentioned).toContain('executeSql'); expect(healingLog.correctToolsMentioned).toContain('respondWithoutAnalysis'); expect(healingLog.correctToolsMentioned).toContain('submitThoughts'); expect(healingLog.correctToolsMentioned).not.toContain('create-metrics-file'); expect(healingLog.chunksAfterHealing).toBeGreaterThan(0); console.log('✅ HEALING PROVEN:', healingLog); }); it('PROOF: Healing works with invalid tool arguments', async () => { // For this test, we need to check if the onError callback handles invalid args // Since we're testing with pre-constructed messages, the tool error would occur during streaming const messages: CoreMessage[] = [ { role: 'user', content: 'Test invalid args', }, ]; let onErrorCalled = false; let errorDetails = ''; const runtimeContext = new RuntimeContext(); runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); runtimeContext.set('chatId', crypto.randomUUID()); runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); runtimeContext.set('dataSourceSyntax', 'postgres'); // Mock the agent to verify onError behavior const mockAgent = { ...thinkAndPrepAgent, stream: vi.fn().mockImplementation(async (messages, options) => { // Verify onError is passed expect(options.onError).toBeDefined(); // Simulate invalid args error const invalidArgsError = new Error('Invalid tool arguments'); invalidArgsError.name = 'AI_InvalidToolArgumentsError'; (invalidArgsError as any).toolCallId = 'test-call'; (invalidArgsError as any).toolName = 'executeSql'; const healingResponse = options.onError(invalidArgsError); if (healingResponse && typeof healingResponse === 'object' && 'error' in healingResponse) { onErrorCalled = true; errorDetails = healingResponse.error as string; } return { fullStream: { async *[Symbol.asyncIterator]() {} } }; }), }; const result = await retryableAgentStreamWithHealing({ agent: mockAgent as any, messages, options: { toolCallStreaming: true, runtimeContext, }, }); expect(result.stream).toBeDefined(); expect(onErrorCalled).toBe(true); expect(errorDetails).toContain('Invalid tool arguments'); console.log('✅ Invalid args healing proven:', errorDetails); }); it('PROOF: Multiple healing attempts work correctly', async () => { // Test that onError can handle multiple errors in a single stream const messages: CoreMessage[] = [ { role: 'user', content: 'Process data', }, ]; const healingAttempts: any[] = []; let onErrorCallCount = 0; const runtimeContext = new RuntimeContext(); runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e'); runtimeContext.set('chatId', crypto.randomUUID()); runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce'); runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a'); runtimeContext.set('dataSourceSyntax', 'postgres'); // Mock agent that simulates multiple errors const mockAgent = { ...thinkAndPrepAgent, stream: vi.fn().mockImplementation(async (messages, options) => { const onError = options.onError; return { fullStream: { async *[Symbol.asyncIterator]() { // First error const error1 = new NoSuchToolError({ toolName: 'bad-tool-1', availableTools: ['sequentialThinking', 'executeSql'], }); (error1 as any).toolCallId = 'call1'; const healing1 = onError(error1); if (healing1) { onErrorCallCount++; healingAttempts.push({ attempt: onErrorCallCount, type: 'no-such-tool', toolName: 'bad-tool-1', }); } yield { type: 'text-delta', text: 'After first healing...' }; // Second error const error2 = new NoSuchToolError({ toolName: 'bad-tool-2', availableTools: ['sequentialThinking', 'executeSql'], }); (error2 as any).toolCallId = 'call2'; const healing2 = onError(error2); if (healing2) { onErrorCallCount++; healingAttempts.push({ attempt: onErrorCallCount, type: 'no-such-tool', toolName: 'bad-tool-2', }); } }, }, }; }), }; const result = await retryableAgentStreamWithHealing({ agent: mockAgent as any, messages, options: { toolCallStreaming: true, runtimeContext, }, retryConfig: { maxRetries: 5, onRetry: (error, attempt) => { // This tracks the healing callbacks console.log('Healing callback:', error.type, attempt); }, }, }); // Process the stream to trigger the errors for await (const chunk of result.stream.fullStream) { // Just consume the stream } expect(result.stream).toBeDefined(); expect(healingAttempts.length).toBeGreaterThanOrEqual(2); expect(healingAttempts[0].type).toBe('no-such-tool'); expect(healingAttempts[1].type).toBe('no-such-tool'); console.log('✅ Multiple healing proven:', healingAttempts); }); });