fix: retry on all AI provider errors, not just overloaded

- Modified retry logic to catch and retry on ALL errors from AI providers - Keeps exponential backoff and message recovery unchanged - Updates logging to be more generic for all error types - Ensures robust handling of unpredictable provider errors (500s, 503s, 429s, etc.)
2025-09-19 10:27:42 -06:00 · 2025-09-19 10:27:42 -06:00 · ec757ce152
parent 863257fdd1
commit ec757ce152
5 changed files with 61 additions and 22 deletions
--- a/packages/ai/src/steps/analyst-agent-steps/analyst-step/analyst-step.ts
+++ b/packages/ai/src/steps/analyst-agent-steps/analyst-step/analyst-step.ts
@ -30,7 +30,7 @@ export async function runAnalystAgentStep({
      maxAttempts: 3,
      baseDelayMs: 2000,
      onRetry: (attempt, recoveredMessageCount) => {
-        console.info('Analyst Agent step retrying after overloaded error', {
+        console.info('Analyst Agent step retrying after error', {
          messageId: options.messageId,
          attempt,
          recoveredMessageCount,
--- a/packages/ai/src/steps/analyst-agent-steps/think-and-prep-step/think-and-prep-step.ts
+++ b/packages/ai/src/steps/analyst-agent-steps/think-and-prep-step/think-and-prep-step.ts
@ -40,7 +40,7 @@ export async function runThinkAndPrepAgentStep({
      maxAttempts: 3,
      baseDelayMs: 2000,
      onRetry: (attempt, recoveredMessageCount) => {
-        console.info('Think and Prep Agent step retrying after overloaded error', {
+        console.info('Think and Prep Agent step retrying after error', {
          messageId: options.messageId,
          attempt,
          recoveredMessageCount,
--- a/packages/ai/src/tools/communication-tools/done-tool/done-tool-streaming.test.ts
+++ b/packages/ai/src/tools/communication-tools/done-tool/done-tool-streaming.test.ts
@ -201,11 +201,12 @@ describe('Done Tool Streaming Tests', () => {
      // No file response messages should be created for report-only case
      const fileResponseCallWithFiles = (
        queries.updateMessageEntries as unknown as { mock: { calls: [Record<string, any>][] } }
-      ).mock.calls.find((c) =>
-        Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
-        ((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
-          (m) => m?.type === 'file'
-        )
+      ).mock.calls.find(
+        (c) =>
+          Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
+          ((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
+            (m) => m?.type === 'file'
+          )
      );
      expect(fileResponseCallWithFiles).toBeUndefined();
    });
@ -297,16 +298,18 @@ describe('Done Tool Streaming Tests', () => {
      // Response messages should include the metric file
      const fileResponseCall = (
        queries.updateMessageEntries as unknown as { mock: { calls: [Record<string, any>][] } }
-      ).mock.calls.find((c) =>
-        Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
-        ((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
-          (m) => m?.type === 'file'
-        )
+      ).mock.calls.find(
+        (c) =>
+          Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
+          ((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
+            (m) => m?.type === 'file'
+          )
      );

      expect(fileResponseCall).toBeDefined();
-      const responseMessages = (fileResponseCall?.[0] as { responseMessages?: Record<string, any>[] })
-        ?.responseMessages as Record<string, any>[];
+      const responseMessages = (
+        fileResponseCall?.[0] as { responseMessages?: Record<string, any>[] }
+      )?.responseMessages as Record<string, any>[];
      const metricResponse = responseMessages?.find((m) => m.id === metricId);
      expect(metricResponse).toBeDefined();
      expect(metricResponse?.file_type).toBe('metric_file');
--- a/packages/ai/src/utils/with-agent-retry.test.ts
+++ b/packages/ai/src/utils/with-agent-retry.test.ts
@ -259,14 +259,14 @@ describe('with-agent-retry', () => {
        expect(result).toEqual(mockResult);
      });

-      it('should retry and succeed after overloaded error', async () => {
+      it('should retry and succeed after any error', async () => {
        const mockResult = { response: Promise.resolve('success') };
        let callCount = 0;

        const agent = createMockAgent(async () => {
          callCount++;
          if (callCount === 1) {
-            throw createOverloadedError();
+            throw new Error('Random provider error');
          }
          return mockResult;
        });
@ -286,9 +286,43 @@ describe('with-agent-retry', () => {
        expect(callCount).toBe(2);
      });

+      it('should retry on various error types', async () => {
+        const errors = [
+          new Error('Internal Server Error'),
+          new Error('Service Unavailable'),
+          new Error('Too Many Requests'),
+          createOverloadedError(),
+        ];
+
+        let callCount = 0;
+        const mockResult = { response: Promise.resolve('success') };
+        const agent = createMockAgent(async () => {
+          if (callCount < errors.length) {
+            throw errors[callCount++];
+          }
+          callCount++;
+          return mockResult;
+        });
+
+        mockFetchMessageEntries.mockResolvedValue({
+          rawLlmMessages: [],
+          responseMessages: [],
+          reasoning: [],
+        });
+
+        const result = await retryStream(agent, [], {
+          messageId: 'test-id',
+          maxAttempts: 5,
+          baseDelayMs: 10,
+        });
+
+        expect(result).toEqual(mockResult);
+        expect(callCount).toBe(5);
+      });
+
      it('should throw after max attempts', async () => {
        const agent = createMockAgent(async () => {
-          throw createOverloadedError();
+          throw new Error('Persistent error');
        });

        mockFetchMessageEntries.mockResolvedValue({
@ -303,7 +337,7 @@ describe('with-agent-retry', () => {
            maxAttempts: 2,
            baseDelayMs: 10,
          })
-        ).rejects.toEqual(createOverloadedError());
+        ).rejects.toThrow('Persistent error');
      });

      it('should call onRetry callback', async () => {
@ -313,7 +347,7 @@ describe('with-agent-retry', () => {
        const agent = createMockAgent(async () => {
          callCount++;
          if (callCount <= 2) {
-            throw createOverloadedError();
+            throw new Error('Temporary error');
          }
          return { response: Promise.resolve('success') };
        });
--- a/packages/ai/src/utils/with-agent-retry.ts
+++ b/packages/ai/src/utils/with-agent-retry.ts
@ -50,6 +50,7 @@ interface RetryableCheck {

 /**
 * Check if an error matches the overloaded error pattern
+ * @deprecated - Kept for backwards compatibility, but all errors are now retryable
 * Pure function - no side effects
 */
 export const isOverloadedError = (error: unknown): boolean => {
@ -97,10 +98,11 @@ export const sleep = (ms: number): Promise<void> =>

 /**
 * Check if an error is retryable and return structured result
+ * Now treats ALL errors as retryable to handle various provider errors
 * Pure function for error analysis
 */
 export const analyzeError = (error: unknown): RetryableCheck => ({
-  isRetryable: isOverloadedError(error),
+  isRetryable: true, // All errors are now retryable
  error,
 });

@ -174,7 +176,7 @@ export const handleFailedAttempt = async (
  console.error(`[Agent Retry] Error on attempt ${attempt}`, {
    messageId,
    error: error instanceof Error ? error.message : 'Unknown error',
-    isOverloaded: isOverloadedError(error),
+    errorType: error instanceof Error ? error.name : typeof error,
  });

  const { isRetryable } = analyzeError(error);
@ -188,7 +190,7 @@ export const handleFailedAttempt = async (
    return { shouldRetry: false, nextMessages: currentMessages, delayMs: 0 };
  }

-  console.warn('[Agent Retry] Overloaded error detected, preparing retry', {
+  console.warn('[Agent Retry] Error detected, preparing retry', {
    messageId,
    attempt,
    remainingAttempts: maxAttempts - attempt,