fix: retry on all AI provider errors, not just overloaded

- Modified retry logic to catch and retry on ALL errors from AI providers
- Keeps exponential backoff and message recovery unchanged
- Updates logging to be more generic for all error types
- Ensures robust handling of unpredictable provider errors (500s, 503s, 429s, etc.)
This commit is contained in:
dal 2025-09-19 10:27:42 -06:00
parent 863257fdd1
commit ec757ce152
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
5 changed files with 61 additions and 22 deletions

View File

@ -30,7 +30,7 @@ export async function runAnalystAgentStep({
maxAttempts: 3,
baseDelayMs: 2000,
onRetry: (attempt, recoveredMessageCount) => {
console.info('Analyst Agent step retrying after overloaded error', {
console.info('Analyst Agent step retrying after error', {
messageId: options.messageId,
attempt,
recoveredMessageCount,

View File

@ -40,7 +40,7 @@ export async function runThinkAndPrepAgentStep({
maxAttempts: 3,
baseDelayMs: 2000,
onRetry: (attempt, recoveredMessageCount) => {
console.info('Think and Prep Agent step retrying after overloaded error', {
console.info('Think and Prep Agent step retrying after error', {
messageId: options.messageId,
attempt,
recoveredMessageCount,

View File

@ -201,11 +201,12 @@ describe('Done Tool Streaming Tests', () => {
// No file response messages should be created for report-only case
const fileResponseCallWithFiles = (
queries.updateMessageEntries as unknown as { mock: { calls: [Record<string, any>][] } }
).mock.calls.find((c) =>
Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
(m) => m?.type === 'file'
)
).mock.calls.find(
(c) =>
Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
(m) => m?.type === 'file'
)
);
expect(fileResponseCallWithFiles).toBeUndefined();
});
@ -297,16 +298,18 @@ describe('Done Tool Streaming Tests', () => {
// Response messages should include the metric file
const fileResponseCall = (
queries.updateMessageEntries as unknown as { mock: { calls: [Record<string, any>][] } }
).mock.calls.find((c) =>
Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
(m) => m?.type === 'file'
)
).mock.calls.find(
(c) =>
Array.isArray((c[0] as { responseMessages?: unknown[] }).responseMessages) &&
((c[0] as { responseMessages?: { type?: string }[] }).responseMessages || []).some(
(m) => m?.type === 'file'
)
);
expect(fileResponseCall).toBeDefined();
const responseMessages = (fileResponseCall?.[0] as { responseMessages?: Record<string, any>[] })
?.responseMessages as Record<string, any>[];
const responseMessages = (
fileResponseCall?.[0] as { responseMessages?: Record<string, any>[] }
)?.responseMessages as Record<string, any>[];
const metricResponse = responseMessages?.find((m) => m.id === metricId);
expect(metricResponse).toBeDefined();
expect(metricResponse?.file_type).toBe('metric_file');

View File

@ -259,14 +259,14 @@ describe('with-agent-retry', () => {
expect(result).toEqual(mockResult);
});
it('should retry and succeed after overloaded error', async () => {
it('should retry and succeed after any error', async () => {
const mockResult = { response: Promise.resolve('success') };
let callCount = 0;
const agent = createMockAgent(async () => {
callCount++;
if (callCount === 1) {
throw createOverloadedError();
throw new Error('Random provider error');
}
return mockResult;
});
@ -286,9 +286,43 @@ describe('with-agent-retry', () => {
expect(callCount).toBe(2);
});
it('should retry on various error types', async () => {
const errors = [
new Error('Internal Server Error'),
new Error('Service Unavailable'),
new Error('Too Many Requests'),
createOverloadedError(),
];
let callCount = 0;
const mockResult = { response: Promise.resolve('success') };
const agent = createMockAgent(async () => {
if (callCount < errors.length) {
throw errors[callCount++];
}
callCount++;
return mockResult;
});
mockFetchMessageEntries.mockResolvedValue({
rawLlmMessages: [],
responseMessages: [],
reasoning: [],
});
const result = await retryStream(agent, [], {
messageId: 'test-id',
maxAttempts: 5,
baseDelayMs: 10,
});
expect(result).toEqual(mockResult);
expect(callCount).toBe(5);
});
it('should throw after max attempts', async () => {
const agent = createMockAgent(async () => {
throw createOverloadedError();
throw new Error('Persistent error');
});
mockFetchMessageEntries.mockResolvedValue({
@ -303,7 +337,7 @@ describe('with-agent-retry', () => {
maxAttempts: 2,
baseDelayMs: 10,
})
).rejects.toEqual(createOverloadedError());
).rejects.toThrow('Persistent error');
});
it('should call onRetry callback', async () => {
@ -313,7 +347,7 @@ describe('with-agent-retry', () => {
const agent = createMockAgent(async () => {
callCount++;
if (callCount <= 2) {
throw createOverloadedError();
throw new Error('Temporary error');
}
return { response: Promise.resolve('success') };
});

View File

@ -50,6 +50,7 @@ interface RetryableCheck {
/**
* Check if an error matches the overloaded error pattern
* @deprecated - Kept for backwards compatibility, but all errors are now retryable
* Pure function - no side effects
*/
export const isOverloadedError = (error: unknown): boolean => {
@ -97,10 +98,11 @@ export const sleep = (ms: number): Promise<void> =>
/**
* Check if an error is retryable and return structured result
* Now treats ALL errors as retryable to handle various provider errors
* Pure function for error analysis
*/
export const analyzeError = (error: unknown): RetryableCheck => ({
isRetryable: isOverloadedError(error),
isRetryable: true, // All errors are now retryable
error,
});
@ -174,7 +176,7 @@ export const handleFailedAttempt = async (
console.error(`[Agent Retry] Error on attempt ${attempt}`, {
messageId,
error: error instanceof Error ? error.message : 'Unknown error',
isOverloaded: isOverloadedError(error),
errorType: error instanceof Error ? error.name : typeof error,
});
const { isRetryable } = analyzeError(error);
@ -188,7 +190,7 @@ export const handleFailedAttempt = async (
return { shouldRetry: false, nextMessages: currentMessages, delayMs: 0 };
}
console.warn('[Agent Retry] Overloaded error detected, preparing retry', {
console.warn('[Agent Retry] Error detected, preparing retry', {
messageId,
attempt,
remainingAttempts: maxAttempts - attempt,