buster/packages/ai/tests/utils/retry/real-healing-scenario.int.t...

139 lines
5.0 KiB
TypeScript

import { RuntimeContext } from '@mastra/core/runtime-context';
import type { CoreMessage } from 'ai';
import { describe, expect, it } from 'vitest';
import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent';
import { retryableAgentStreamWithHealing } from '../../../src/utils/retry';
import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow';
describe('Real-world healing scenario', () => {
it('should demonstrate healing works when agent naturally tries wrong tool', async () => {
const messages: CoreMessage[] = [
{
role: 'user',
content:
'Create a dashboard and metrics for analyzing monthly sales trends. Include revenue by product category.',
},
];
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
const healingLog = {
attempts: [] as any[],
toolsCalled: [] as string[],
finishedSuccessfully: false,
finalTool: null as string | null,
};
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
maxSteps: 10, // Allow multiple steps for the agent to recover
},
retryConfig: {
maxRetries: 3,
onRetry: (error, attempt) => {
healingLog.attempts.push({
attempt,
errorType: error.type,
toolName: error.originalError?.toolName || 'unknown',
message: error.healingMessage,
});
console.log(`Healing attempt ${attempt}:`, {
errorType: error.type,
toolName: error.originalError?.toolName || 'unknown',
});
},
},
});
// Process the stream and track what happens
for await (const chunk of result.stream.fullStream) {
if (chunk.type === 'tool-call') {
healingLog.toolsCalled.push(chunk.toolName);
console.log('Tool called:', chunk.toolName);
}
if (
chunk.type === 'tool-call' &&
(chunk.toolName === 'submitThoughts' || chunk.toolName === 'respondWithoutAnalysis')
) {
healingLog.finishedSuccessfully = true;
healingLog.finalTool = chunk.toolName;
}
}
// Log results
console.log('\n📊 Healing Scenario Results:');
console.log(' Tools called:', healingLog.toolsCalled);
console.log(' Healing attempts:', healingLog.attempts.length);
console.log(' Finished successfully:', healingLog.finishedSuccessfully);
console.log(' Final tool:', healingLog.finalTool);
// The agent should eventually succeed
expect(healingLog.finishedSuccessfully).toBe(true);
expect(['submitThoughts', 'respondWithoutAnalysis']).toContain(healingLog.finalTool);
});
it('should track actual agent behavior with dashboard creation prompt', async () => {
// This test helps us understand what tools the agent naturally tries to use
const messages: CoreMessage[] = [
{
role: 'user',
content:
'I need you to create metrics and a dashboard for tracking customer satisfaction scores',
},
];
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
const toolUsage = {
allTools: [] as string[],
uniqueTools: new Set<string>(),
};
try {
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
maxSteps: 5,
},
});
for await (const chunk of result.stream.fullStream) {
if (chunk.type === 'tool-call') {
toolUsage.allTools.push(chunk.toolName);
toolUsage.uniqueTools.add(chunk.toolName);
}
}
} catch (error) {
console.log('Error during execution:', error);
}
console.log('\n🔧 Tool Usage Analysis:');
console.log(' All tools called:', toolUsage.allTools);
console.log(' Unique tools:', Array.from(toolUsage.uniqueTools));
console.log(' Available tools:', Object.keys(thinkAndPrepAgent.tools));
// The agent should only use its available tools
for (const tool of toolUsage.uniqueTools) {
expect(Object.keys(thinkAndPrepAgent.tools)).toContain(tool);
}
});
});