buster/packages/ai/tests/utils/retry/healing-proof.test.ts

356 lines
13 KiB
TypeScript

import { RuntimeContext } from '@mastra/core/runtime-context';
import type { CoreMessage } from 'ai';
import { NoSuchToolError } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent';
import { retryableAgentStreamWithHealing } from '../../../src/utils/retry';
import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow';
/**
* DEFINITIVE PROOF TEST
* This test proves that our healing mechanism works by:
* 1. Forcing a tool error
* 2. Capturing the healing response
* 3. Verifying the agent continues after healing
*/
describe('Definitive Healing Proof', () => {
it('PROOF: Agent heals from NoSuchToolError and continues execution', async () => {
// Step 1: Create a message that will definitely trigger a NoSuchToolError
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Analyze my data',
},
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'proof_call_123',
toolName: 'create-metrics-file', // 100% GUARANTEED TO FAIL - NOT IN AGENT'S TOOLS
args: {
files: [
{
file_name: 'test.yml',
datasource: 'test',
collections: [],
},
],
},
},
],
},
];
// Step 2: Track healing behavior
const healingLog = {
errorsCaught: 0,
healingMessagesReturned: 0,
availableToolsListed: false,
correctToolsMentioned: [] as string[],
streamCompleted: false,
chunksAfterHealing: 0,
onErrorCalled: false,
healingResponse: null as any,
};
// Step 3: Run with our healing mechanism
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
// Create a mock agent to capture onError behavior
const mockAgent = {
...thinkAndPrepAgent,
stream: vi.fn().mockImplementation(async (messages, options) => {
// Capture the onError callback
const onError = options?.onError;
return {
fullStream: {
async *[Symbol.asyncIterator]() {
// Yield initial content
yield { type: 'text-delta', text: 'Processing...' };
// Simulate the tool error when processing the bad tool call
if (
messages.some(
(m) =>
m.role === 'assistant' &&
Array.isArray(m.content) &&
m.content.some(
(c: any) => c.type === 'tool-call' && c.toolName === 'create-metrics-file'
)
)
) {
// Create the error
const toolError = new NoSuchToolError({
toolName: 'create-metrics-file',
availableTools: [
'sequentialThinking',
'executeSql',
'respondWithoutAnalysis',
'submitThoughts',
],
});
(toolError as any).toolCallId = 'proof_call_123';
// Call onError and capture the healing response
if (onError) {
healingLog.onErrorCalled = true;
healingLog.healingResponse = onError(toolError);
healingLog.errorsCaught++;
// Analyze the healing response
if (healingLog.healingResponse && 'error' in healingLog.healingResponse) {
const errorMessage = healingLog.healingResponse.error;
healingLog.healingMessagesReturned++;
if (errorMessage.includes('Tool "create-metrics-file" is not available')) {
healingLog.availableToolsListed = true;
}
// Check that correct tools are listed
if (errorMessage.includes('sequentialThinking'))
healingLog.correctToolsMentioned.push('sequentialThinking');
if (errorMessage.includes('executeSql'))
healingLog.correctToolsMentioned.push('executeSql');
if (errorMessage.includes('respondWithoutAnalysis'))
healingLog.correctToolsMentioned.push('respondWithoutAnalysis');
if (errorMessage.includes('submitThoughts'))
healingLog.correctToolsMentioned.push('submitThoughts');
}
// Yield the healing as a tool result
yield {
type: 'tool-result',
toolCallId: 'proof_call_123',
toolName: 'create-metrics-file',
result: healingLog.healingResponse,
};
}
}
// Continue after healing
yield { type: 'text-delta', text: 'Continuing after healing...' };
healingLog.streamCompleted = true;
},
},
};
}),
};
const result = await retryableAgentStreamWithHealing({
agent: mockAgent as any,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
},
retryConfig: {
maxRetries: 3,
onRetry: (error, attempt) => {
// This is only called for stream creation retries, not in-stream healing
console.log('Stream creation retry:', error.type, attempt);
},
},
});
// Step 4: Verify stream was created successfully
expect(result.stream).toBeDefined();
// Note: retryCount reflects stream creation retries, not in-stream healing
// In-stream healing via onError doesn't increment retryCount
// Step 5: Process stream to verify it continues after healing
try {
for await (const chunk of result.stream.fullStream) {
healingLog.chunksAfterHealing++;
// Break after a few chunks to avoid long test
if (healingLog.chunksAfterHealing > 5) break;
}
} catch (error) {
// If we get here, healing failed
throw new Error(`Stream failed after healing: ${error}`);
}
// Check healing log after stream is consumed
expect(healingLog.onErrorCalled).toBe(true);
expect(healingLog.healingResponse).toBeDefined();
expect(healingLog.healingResponse.error).toContain(
'Tool "create-metrics-file" is not available'
);
// Step 6: FINAL VERIFICATION - All healing behaviors occurred
expect(healingLog.errorsCaught).toBe(1);
expect(healingLog.healingMessagesReturned).toBe(1);
expect(healingLog.availableToolsListed).toBe(true);
expect(healingLog.correctToolsMentioned).toContain('sequentialThinking');
expect(healingLog.correctToolsMentioned).toContain('executeSql');
expect(healingLog.correctToolsMentioned).toContain('respondWithoutAnalysis');
expect(healingLog.correctToolsMentioned).toContain('submitThoughts');
expect(healingLog.correctToolsMentioned).not.toContain('create-metrics-file');
expect(healingLog.chunksAfterHealing).toBeGreaterThan(0);
console.log('✅ HEALING PROVEN:', healingLog);
});
it('PROOF: Healing works with invalid tool arguments', async () => {
// For this test, we need to check if the onError callback handles invalid args
// Since we're testing with pre-constructed messages, the tool error would occur during streaming
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Test invalid args',
},
];
let onErrorCalled = false;
let errorDetails = '';
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
// Mock the agent to verify onError behavior
const mockAgent = {
...thinkAndPrepAgent,
stream: vi.fn().mockImplementation(async (messages, options) => {
// Verify onError is passed
expect(options.onError).toBeDefined();
// Simulate invalid args error
const invalidArgsError = new Error('Invalid tool arguments');
invalidArgsError.name = 'AI_InvalidToolArgumentsError';
(invalidArgsError as any).toolCallId = 'test-call';
(invalidArgsError as any).toolName = 'executeSql';
const healingResponse = options.onError(invalidArgsError);
if (healingResponse && typeof healingResponse === 'object' && 'error' in healingResponse) {
onErrorCalled = true;
errorDetails = healingResponse.error as string;
}
return { fullStream: { async *[Symbol.asyncIterator]() {} } };
}),
};
const result = await retryableAgentStreamWithHealing({
agent: mockAgent as any,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
},
});
expect(result.stream).toBeDefined();
expect(onErrorCalled).toBe(true);
expect(errorDetails).toContain('Invalid tool arguments');
console.log('✅ Invalid args healing proven:', errorDetails);
});
it('PROOF: Multiple healing attempts work correctly', async () => {
// Test that onError can handle multiple errors in a single stream
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Process data',
},
];
const healingAttempts: any[] = [];
let onErrorCallCount = 0;
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
// Mock agent that simulates multiple errors
const mockAgent = {
...thinkAndPrepAgent,
stream: vi.fn().mockImplementation(async (messages, options) => {
const onError = options.onError;
return {
fullStream: {
async *[Symbol.asyncIterator]() {
// First error
const error1 = new NoSuchToolError({
toolName: 'bad-tool-1',
availableTools: ['sequentialThinking', 'executeSql'],
});
(error1 as any).toolCallId = 'call1';
const healing1 = onError(error1);
if (healing1) {
onErrorCallCount++;
healingAttempts.push({
attempt: onErrorCallCount,
type: 'no-such-tool',
toolName: 'bad-tool-1',
});
}
yield { type: 'text-delta', text: 'After first healing...' };
// Second error
const error2 = new NoSuchToolError({
toolName: 'bad-tool-2',
availableTools: ['sequentialThinking', 'executeSql'],
});
(error2 as any).toolCallId = 'call2';
const healing2 = onError(error2);
if (healing2) {
onErrorCallCount++;
healingAttempts.push({
attempt: onErrorCallCount,
type: 'no-such-tool',
toolName: 'bad-tool-2',
});
}
},
},
};
}),
};
const result = await retryableAgentStreamWithHealing({
agent: mockAgent as any,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
},
retryConfig: {
maxRetries: 5,
onRetry: (error, attempt) => {
// This tracks the healing callbacks
console.log('Healing callback:', error.type, attempt);
},
},
});
// Process the stream to trigger the errors
for await (const chunk of result.stream.fullStream) {
// Just consume the stream
}
expect(result.stream).toBeDefined();
expect(healingAttempts.length).toBeGreaterThanOrEqual(2);
expect(healingAttempts[0].type).toBe('no-such-tool');
expect(healingAttempts[1].type).toBe('no-such-tool');
console.log('✅ Multiple healing proven:', healingAttempts);
});
});