buster/packages/ai/tests/utils/retry/healing-proof.test.ts

import { RuntimeContext } from '@mastra/core/runtime-context';
import type { CoreMessage } from 'ai';
import { NoSuchToolError } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { thinkAndPrepAgent } from '../../../src/agents/think-and-prep-agent/think-and-prep-agent';
import { retryableAgentStreamWithHealing } from '../../../src/utils/retry';
import type { AnalystRuntimeContext } from '../../../src/workflows/analyst-workflow';

/**
 * DEFINITIVE PROOF TEST
 * This test proves that our healing mechanism works by:
 * 1. Forcing a tool error
 * 2. Capturing the healing response
 * 3. Verifying the agent continues after healing
 */
describe('Definitive Healing Proof', () => {
  it('PROOF: Agent heals from NoSuchToolError and continues execution', async () => {
    // Step 1: Create a message that will definitely trigger a NoSuchToolError
    const messages: CoreMessage[] = [
      {
        role: 'user',
        content: 'Analyze my data',
      },
      {
        role: 'assistant',
        content: [
          {
            type: 'tool-call',
            toolCallId: 'proof_call_123',
            toolName: 'create-metrics-file', // 100% GUARANTEED TO FAIL - NOT IN AGENT'S TOOLS
            args: {
              files: [
                {
                  file_name: 'test.yml',
                  datasource: 'test',
                  collections: [],
                },
              ],
            },
          },
        ],
      },
    ];

    // Step 2: Track healing behavior
    const healingLog = {
      errorsCaught: 0,
      healingMessagesReturned: 0,
      availableToolsListed: false,
      correctToolsMentioned: [] as string[],
      streamCompleted: false,
      chunksAfterHealing: 0,
      onErrorCalled: false,
      healingResponse: null as any,
    };

    // Step 3: Run with our healing mechanism
    const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
    runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
    runtimeContext.set('chatId', crypto.randomUUID());
    runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
    runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
    runtimeContext.set('dataSourceSyntax', 'postgres');

    // Create a mock agent to capture onError behavior
    const mockAgent = {
      ...thinkAndPrepAgent,
      stream: vi.fn().mockImplementation(async (messages, options) => {
        // Capture the onError callback
        const onError = options?.onError;

        return {
          fullStream: {
            async *[Symbol.asyncIterator]() {
              // Yield initial content
              yield { type: 'text-delta', text: 'Processing...' };

              // Simulate the tool error when processing the bad tool call
              if (
                messages.some(
                  (m) =>
                    m.role === 'assistant' &&
                    Array.isArray(m.content) &&
                    m.content.some(
                      (c: any) => c.type === 'tool-call' && c.toolName === 'create-metrics-file'
                    )
                )
              ) {
                // Create the error
                const toolError = new NoSuchToolError({
                  toolName: 'create-metrics-file',
                  availableTools: [
                    'sequentialThinking',
                    'executeSql',
                    'respondWithoutAnalysis',
                    'submitThoughts',
                  ],
                });
                (toolError as any).toolCallId = 'proof_call_123';

                // Call onError and capture the healing response
                if (onError) {
                  healingLog.onErrorCalled = true;
                  healingLog.healingResponse = onError(toolError);
                  healingLog.errorsCaught++;

                  // Analyze the healing response
                  if (healingLog.healingResponse && 'error' in healingLog.healingResponse) {
                    const errorMessage = healingLog.healingResponse.error;
                    healingLog.healingMessagesReturned++;

                    if (errorMessage.includes('Tool "create-metrics-file" is not available')) {
                      healingLog.availableToolsListed = true;
                    }

                    // Check that correct tools are listed
                    if (errorMessage.includes('sequentialThinking'))
                      healingLog.correctToolsMentioned.push('sequentialThinking');
                    if (errorMessage.includes('executeSql'))
                      healingLog.correctToolsMentioned.push('executeSql');
                    if (errorMessage.includes('respondWithoutAnalysis'))
                      healingLog.correctToolsMentioned.push('respondWithoutAnalysis');
                    if (errorMessage.includes('submitThoughts'))
                      healingLog.correctToolsMentioned.push('submitThoughts');
                  }

                  // Yield the healing as a tool result
                  yield {
                    type: 'tool-result',
                    toolCallId: 'proof_call_123',
                    toolName: 'create-metrics-file',
                    result: healingLog.healingResponse,
                  };
                }
              }

              // Continue after healing
              yield { type: 'text-delta', text: 'Continuing after healing...' };
              healingLog.streamCompleted = true;
            },
          },
        };
      }),
    };

    const result = await retryableAgentStreamWithHealing({
      agent: mockAgent as any,
      messages,
      options: {
        toolCallStreaming: true,
        runtimeContext,
      },
      retryConfig: {
        maxRetries: 3,
        onRetry: (error, attempt) => {
          // This is only called for stream creation retries, not in-stream healing
          console.log('Stream creation retry:', error.type, attempt);
        },
      },
    });

    // Step 4: Verify stream was created successfully
    expect(result.stream).toBeDefined();
    // Note: retryCount reflects stream creation retries, not in-stream healing
    // In-stream healing via onError doesn't increment retryCount

    // Step 5: Process stream to verify it continues after healing
    try {
      for await (const chunk of result.stream.fullStream) {
        healingLog.chunksAfterHealing++;
        // Break after a few chunks to avoid long test
        if (healingLog.chunksAfterHealing > 5) break;
      }
    } catch (error) {
      // If we get here, healing failed
      throw new Error(`Stream failed after healing: ${error}`);
    }

    // Check healing log after stream is consumed
    expect(healingLog.onErrorCalled).toBe(true);
    expect(healingLog.healingResponse).toBeDefined();
    expect(healingLog.healingResponse.error).toContain(
      'Tool "create-metrics-file" is not available'
    );

    // Step 6: FINAL VERIFICATION - All healing behaviors occurred
    expect(healingLog.errorsCaught).toBe(1);
    expect(healingLog.healingMessagesReturned).toBe(1);
    expect(healingLog.availableToolsListed).toBe(true);
    expect(healingLog.correctToolsMentioned).toContain('sequentialThinking');
    expect(healingLog.correctToolsMentioned).toContain('executeSql');
    expect(healingLog.correctToolsMentioned).toContain('respondWithoutAnalysis');
    expect(healingLog.correctToolsMentioned).toContain('submitThoughts');
    expect(healingLog.correctToolsMentioned).not.toContain('create-metrics-file');
    expect(healingLog.chunksAfterHealing).toBeGreaterThan(0);

    console.log('✅ HEALING PROVEN:', healingLog);
  });

  it('PROOF: Healing works with invalid tool arguments', async () => {
    // For this test, we need to check if the onError callback handles invalid args
    // Since we're testing with pre-constructed messages, the tool error would occur during streaming
    const messages: CoreMessage[] = [
      {
        role: 'user',
        content: 'Test invalid args',
      },
    ];

    let onErrorCalled = false;
    let errorDetails = '';

    const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
    runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
    runtimeContext.set('chatId', crypto.randomUUID());
    runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
    runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
    runtimeContext.set('dataSourceSyntax', 'postgres');

    // Mock the agent to verify onError behavior
    const mockAgent = {
      ...thinkAndPrepAgent,
      stream: vi.fn().mockImplementation(async (messages, options) => {
        // Verify onError is passed
        expect(options.onError).toBeDefined();

        // Simulate invalid args error
        const invalidArgsError = new Error('Invalid tool arguments');
        invalidArgsError.name = 'AI_InvalidToolArgumentsError';
        (invalidArgsError as any).toolCallId = 'test-call';
        (invalidArgsError as any).toolName = 'executeSql';

        const healingResponse = options.onError(invalidArgsError);
        if (healingResponse && typeof healingResponse === 'object' && 'error' in healingResponse) {
          onErrorCalled = true;
          errorDetails = healingResponse.error as string;
        }

        return { fullStream: { async *[Symbol.asyncIterator]() {} } };
      }),
    };

    const result = await retryableAgentStreamWithHealing({
      agent: mockAgent as any,
      messages,
      options: {
        toolCallStreaming: true,
        runtimeContext,
      },
    });

    expect(result.stream).toBeDefined();
    expect(onErrorCalled).toBe(true);
    expect(errorDetails).toContain('Invalid tool arguments');
    console.log('✅ Invalid args healing proven:', errorDetails);
  });

  it('PROOF: Multiple healing attempts work correctly', async () => {
    // Test that onError can handle multiple errors in a single stream
    const messages: CoreMessage[] = [
      {
        role: 'user',
        content: 'Process data',
      },
    ];

    const healingAttempts: any[] = [];
    let onErrorCallCount = 0;

    const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
    runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
    runtimeContext.set('chatId', crypto.randomUUID());
    runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
    runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
    runtimeContext.set('dataSourceSyntax', 'postgres');

    // Mock agent that simulates multiple errors
    const mockAgent = {
      ...thinkAndPrepAgent,
      stream: vi.fn().mockImplementation(async (messages, options) => {
        const onError = options.onError;

        return {
          fullStream: {
            async *[Symbol.asyncIterator]() {
              // First error
              const error1 = new NoSuchToolError({
                toolName: 'bad-tool-1',
                availableTools: ['sequentialThinking', 'executeSql'],
              });
              (error1 as any).toolCallId = 'call1';

              const healing1 = onError(error1);
              if (healing1) {
                onErrorCallCount++;
                healingAttempts.push({
                  attempt: onErrorCallCount,
                  type: 'no-such-tool',
                  toolName: 'bad-tool-1',
                });
              }

              yield { type: 'text-delta', text: 'After first healing...' };

              // Second error
              const error2 = new NoSuchToolError({
                toolName: 'bad-tool-2',
                availableTools: ['sequentialThinking', 'executeSql'],
              });
              (error2 as any).toolCallId = 'call2';

              const healing2 = onError(error2);
              if (healing2) {
                onErrorCallCount++;
                healingAttempts.push({
                  attempt: onErrorCallCount,
                  type: 'no-such-tool',
                  toolName: 'bad-tool-2',
                });
              }
            },
          },
        };
      }),
    };

    const result = await retryableAgentStreamWithHealing({
      agent: mockAgent as any,
      messages,
      options: {
        toolCallStreaming: true,
        runtimeContext,
      },
      retryConfig: {
        maxRetries: 5,
        onRetry: (error, attempt) => {
          // This tracks the healing callbacks
          console.log('Healing callback:', error.type, attempt);
        },
      },
    });

    // Process the stream to trigger the errors
    for await (const chunk of result.stream.fullStream) {
      // Just consume the stream
    }

    expect(result.stream).toBeDefined();
    expect(healingAttempts.length).toBeGreaterThanOrEqual(2);
    expect(healingAttempts[0].type).toBe('no-such-tool');
    expect(healingAttempts[1].type).toBe('no-such-tool');

    console.log('✅ Multiple healing proven:', healingAttempts);
  });
});