buster/packages/ai/tests/utils/retry/think-and-prep-healing.int....

336 lines
12 KiB
TypeScript

import { RuntimeContext } from '@mastra/core/runtime-context';
import type { CoreMessage } from 'ai';
import { NoSuchToolError } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent';
import { retryableAgentStreamWithHealing } from '../../../src/utils/retry';
import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow';
describe('Think-and-Prep Agent - Tool Error Healing Integration', () => {
it.skip('should heal when think-and-prep agent tries to call create-metrics-file - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => {
// This simulates the real issue: think-and-prep agent trying to create files
// when it should only be thinking and preparing
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Create a dashboard showing monthly sales trends',
},
{
role: 'assistant',
content: [
{
type: 'text',
text: "I'll create a dashboard for your monthly sales trends. Let me start by creating the metrics file.",
},
],
},
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call_thinkprep_123',
toolName: 'create-metrics-file', // THIS TOOL DOESN'T EXIST IN THINK-AND-PREP!
args: {
files: [
{
file_name: 'sales_metrics.yml',
datasource: 'sales_db',
collections: [
{
name: 'monthly_sales',
sql: 'SELECT * FROM sales',
},
],
},
],
},
},
],
},
];
let healingOccurred = false;
let healedError: any;
const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
runtimeContext.set('chatId', crypto.randomUUID());
runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
runtimeContext.set('dataSourceSyntax', 'postgres');
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext,
},
retryConfig: {
maxRetries: 3,
onRetry: (error, attempt) => {
console.log(`Think-and-Prep healing attempt ${attempt}:`, error.type);
healingOccurred = true;
healedError = error;
// Verify the error message mentions available tools
if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) {
const toolResult = error.healingMessage.content[0];
if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) {
console.log('Error message:', toolResult.result.error);
// Should list the actual available tools: sequentialThinking, executeSql, respondWithoutAnalysis, submitThoughts
expect(toolResult.result.error).toContain('sequentialThinking');
expect(toolResult.result.error).toContain('executeSql');
expect(toolResult.result.error).not.toContain('create-metrics-file');
}
}
},
},
});
expect(result.stream).toBeDefined();
expect(healingOccurred).toBe(true);
expect(healedError?.type).toBe('no-such-tool');
});
it.skip('should heal when think-and-prep tries multiple visualization tools - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => {
// Simulate the agent trying various visualization tools it doesn't have
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Analyze sales data and create visualizations',
},
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call_1',
toolName: 'create-dashboards-file', // Not available!
args: { files: [] },
},
],
},
];
const healingAttempts: any[] = [];
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext: (() => {
const ctx = new RuntimeContext<AnalystRuntimeContext>();
ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
ctx.set('chatId', crypto.randomUUID());
ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
ctx.set('dataSourceSyntax', 'postgres');
return ctx;
})(),
},
retryConfig: {
maxRetries: 3,
onRetry: (error, attempt) => {
healingAttempts.push({
attempt,
errorType: error.type,
toolName: error.originalError?.toolName || 'unknown',
});
},
},
});
expect(result.stream).toBeDefined();
expect(healingAttempts.length).toBeGreaterThan(0);
expect(healingAttempts[0].errorType).toBe('no-such-tool');
expect(healingAttempts[0].toolName).toBe('create-dashboards-file');
});
it('should complete successfully when using only available tools', async () => {
// This should work without any healing needed
const messages: CoreMessage[] = [
{
role: 'user',
content: 'Think about how to analyze sales data',
},
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call_valid_1',
toolName: 'sequentialThinking', // This tool exists!
args: {
thought: 'I need to understand the sales data structure first',
},
},
],
},
];
let healingOccurred = false;
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext: (() => {
const ctx = new RuntimeContext<AnalystRuntimeContext>();
ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
ctx.set('chatId', crypto.randomUUID());
ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
ctx.set('dataSourceSyntax', 'postgres');
return ctx;
})(),
},
retryConfig: {
maxRetries: 3,
onRetry: () => {
healingOccurred = true;
},
},
});
expect(result.stream).toBeDefined();
expect(healingOccurred).toBe(false); // No healing should occur
expect(result.retryCount).toBe(0);
});
it.skip('should provide helpful error messages listing available tools - SKIPPED: Cannot force real agent to execute pre-crafted tool calls', async () => {
const messages: CoreMessage[] = [
{
role: 'assistant',
content: [
{
type: 'tool-call',
toolCallId: 'call_test',
toolName: 'modify-metrics-file',
args: {},
},
],
},
];
let capturedErrorMessage = '';
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext: (() => {
const ctx = new RuntimeContext<AnalystRuntimeContext>();
ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
ctx.set('chatId', crypto.randomUUID());
ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
ctx.set('dataSourceSyntax', 'postgres');
return ctx;
})(),
},
retryConfig: {
maxRetries: 1,
onRetry: (error) => {
if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) {
const toolResult = error.healingMessage.content[0];
if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) {
capturedErrorMessage = toolResult.result.error as string;
}
}
},
},
});
expect(result.stream).toBeDefined();
// Verify the error message lists the correct available tools
expect(capturedErrorMessage).toContain('Tool "modify-metrics-file" is not available');
expect(capturedErrorMessage).toContain('Available tools:');
expect(capturedErrorMessage).toContain('sequentialThinking');
expect(capturedErrorMessage).toContain('executeSql');
expect(capturedErrorMessage).toContain('respondWithoutAnalysis');
expect(capturedErrorMessage).toContain('submitThoughts');
// Verify it does NOT list visualization tools
expect(capturedErrorMessage).not.toContain('create-metrics-file');
expect(capturedErrorMessage).not.toContain('create-dashboards-file');
});
});
// Real-world scenario test
describe('Think-and-Prep Real Scenario', () => {
it('should handle agent confusion about its role', async () => {
// This tests a common scenario where the agent gets confused about what it can do
const messages: CoreMessage[] = [
{
role: 'user',
content:
'I need you to create a sales dashboard with revenue trends, top products, and customer segments',
},
// The agent might naturally try to create files here
];
const healingMessages: string[] = [];
let finalToolUsed: string | undefined;
const result = await retryableAgentStreamWithHealing({
agent: thinkAndPrepAgent,
messages,
options: {
toolCallStreaming: true,
runtimeContext: (() => {
const ctx = new RuntimeContext<AnalystRuntimeContext>();
ctx.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
ctx.set('chatId', crypto.randomUUID());
ctx.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
ctx.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
ctx.set('dataSourceSyntax', 'postgres');
return ctx;
})(),
temperature: 0.5, // Add some variability
},
retryConfig: {
maxRetries: 5, // Allow more retries for learning
onRetry: (error, attempt) => {
if (error.healingMessage.role === 'tool' && Array.isArray(error.healingMessage.content)) {
const toolResult = error.healingMessage.content[0];
if ('result' in toolResult && toolResult.result && 'error' in toolResult.result) {
healingMessages.push(`Attempt ${attempt}: ${toolResult.result.error}`);
}
}
},
},
});
// Process the stream to see what tool it eventually uses
for await (const chunk of result.stream.fullStream) {
if (chunk.type === 'tool-call') {
finalToolUsed = chunk.toolName;
console.log(`Agent eventually used: ${chunk.toolName}`);
}
}
expect(result.stream).toBeDefined();
// The agent should eventually use one of its available tools
if (finalToolUsed) {
expect([
'sequentialThinking',
'executeSql',
'respondWithoutAnalysis',
'submitThoughts',
]).toContain(finalToolUsed);
}
// Log healing attempts for debugging
if (healingMessages.length > 0) {
console.log('Healing messages:', healingMessages);
}
});
});