Update AI SDK dependencies and refactor model initialization to use AI Gateway. Remove deprecated fallback logic and related tests for improved performance and maintainability.

This commit is contained in:
dal 2025-09-02 17:20:39 -06:00
parent 497598afe1
commit d524e33996
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
14 changed files with 125 additions and 2065 deletions

View File

@ -38,10 +38,7 @@
"braintrust:push:tools:staged": "npx braintrust push evals/agents/analyst-agent/tool-order/staged-scorers.ts"
},
"dependencies": {
"@ai-sdk/anthropic": "^2.0.0",
"@ai-sdk/google-vertex": "^3.0.0",
"@ai-sdk/openai": "^2.0.0",
"@ai-sdk/provider": "^2.0.0",
"@ai-sdk/gateway": "^1.0.15",
"@buster/access-controls": "workspace:*",
"@buster/data-source": "workspace:*",
"@buster/database": "workspace:*",

File diff suppressed because it is too large Load Diff

View File

@ -1,345 +0,0 @@
import type {
LanguageModelV2,
LanguageModelV2CallOptions,
LanguageModelV2CallWarning,
LanguageModelV2Content,
LanguageModelV2FinishReason,
LanguageModelV2StreamPart,
LanguageModelV2Usage,
SharedV2ProviderMetadata,
} from '@ai-sdk/provider';
interface RetryableError extends Error {
statusCode?: number;
}
interface Settings {
models: LanguageModelV2[];
retryAfterOutput?: boolean;
modelResetInterval?: number;
maxRetriesPerModel?: number;
shouldRetryThisError?: (error: RetryableError) => boolean;
onError?: (error: RetryableError, modelId: string) => void | Promise<void>;
}
export function createFallback(settings: Settings): FallbackModel {
return new FallbackModel(settings);
}
const retryableStatusCodes = [
401, // wrong API key
403, // permission error, like cannot access model or from a non accessible region
408, // request timeout
409, // conflict
413, // payload too large
429, // too many requests/rate limits
500, // server error (and above)
];
// Common error messages/codes that indicate server overload or temporary issues
const retryableErrors = [
'overloaded',
'service unavailable',
'bad gateway',
'too many requests',
'internal server error',
'gateway timeout',
'rate_limit',
'wrong-key',
'unexpected',
'capacity',
'timeout',
'server_error',
'429', // Too Many Requests
'500', // Internal Server Error
'502', // Bad Gateway
'503', // Service Unavailable
'504', // Gateway Timeout
];
function defaultShouldRetryThisError(error: RetryableError): boolean {
// Handle null/undefined errors
if (!error) return false;
const statusCode = error.statusCode;
if (statusCode && (retryableStatusCodes.includes(statusCode) || statusCode >= 500)) {
return true;
}
if (error.message) {
const errorString = error.message.toLowerCase();
return retryableErrors.some((errType) => errorString.includes(errType));
}
// Check error object properties for retryable patterns
if (typeof error === 'object') {
try {
const errorString = JSON.stringify(error).toLowerCase();
return retryableErrors.some((errType) => errorString.includes(errType));
} catch {
// JSON.stringify can throw on circular references
return false;
}
}
return false;
}
function simpleBackoff(attempt: number): number {
return Math.min(1000 * 2 ** attempt, 10000); // 1s, 2s, 4s, 8s, max 10s
}
export class FallbackModel implements LanguageModelV2 {
readonly specificationVersion = 'v2';
get supportedUrls(): Record<string, RegExp[]> | PromiseLike<Record<string, RegExp[]>> {
return this.getCurrentModel().supportedUrls;
}
get modelId(): string {
return this.getCurrentModel().modelId;
}
readonly settings: Settings;
private _currentModelIndex = 0;
private lastModelReset: number = Date.now();
private readonly modelResetInterval: number;
retryAfterOutput: boolean;
get currentModelIndex(): number {
return this._currentModelIndex;
}
private set currentModelIndex(value: number) {
this._currentModelIndex = value;
}
constructor(settings: Settings) {
this.settings = settings;
this.modelResetInterval = settings.modelResetInterval ?? 3 * 60 * 1000; // Default 3 minutes in ms
this.retryAfterOutput = settings.retryAfterOutput ?? true;
if (!this.settings.models[this._currentModelIndex]) {
throw new Error('No models available in settings');
}
}
get provider(): string {
return this.getCurrentModel().provider;
}
private getCurrentModel(): LanguageModelV2 {
const model = this.settings.models[this._currentModelIndex];
if (!model) {
throw new Error(`No model available at index ${this._currentModelIndex}`);
}
console.info(
`[Fallback] Using model: ${model.modelId} (index: ${this._currentModelIndex}/${this.settings.models.length - 1})`
);
return model;
}
private checkAndResetModel() {
// Only reset if we're not already on the primary model
if (this.currentModelIndex === 0) return;
const now = Date.now();
if (now - this.lastModelReset >= this.modelResetInterval) {
// Reset to primary model
console.info(
`[Fallback] Resetting to primary model after ${this.modelResetInterval}ms timeout`
);
this.currentModelIndex = 0;
this.lastModelReset = now;
}
}
private switchToNextModel() {
const previousModel = this.settings.models[this.currentModelIndex]?.modelId || 'unknown';
this.currentModelIndex = (this.currentModelIndex + 1) % this.settings.models.length;
const nextModel = this.settings.models[this.currentModelIndex]?.modelId || 'unknown';
console.warn(`Switching from model ${previousModel} to ${nextModel} due to error`);
}
private async retry<T>(fn: () => PromiseLike<T>): Promise<T> {
let lastError: RetryableError | undefined;
const initialModel = this.currentModelIndex;
const maxRetriesPerModel = this.settings.maxRetriesPerModel ?? 2;
do {
let modelRetryCount = 0;
// Retry current model up to maxRetriesPerModel times
while (modelRetryCount < maxRetriesPerModel) {
try {
const result = await fn();
if (modelRetryCount > 0 || this.currentModelIndex !== initialModel) {
console.info(
`[Fallback] Request succeeded on model ${this.modelId} after ${modelRetryCount} retries`
);
}
return result;
} catch (error) {
lastError = error as RetryableError;
const shouldRetry = this.settings.shouldRetryThisError || defaultShouldRetryThisError;
if (!shouldRetry(lastError)) {
throw lastError; // Non-retryable error
}
if (this.settings.onError) {
try {
await this.settings.onError(lastError, this.modelId);
} catch {
// Don't let onError callback failures break the retry logic
}
}
modelRetryCount++;
if (modelRetryCount < maxRetriesPerModel) {
// Wait before retrying same model
await new Promise((resolve) => setTimeout(resolve, simpleBackoff(modelRetryCount - 1)));
}
}
}
// All retries for this model exhausted, switch to next model
console.warn(
`Model ${this.modelId} exhausted ${maxRetriesPerModel} retries, switching to next model`
);
this.switchToNextModel();
if (this.currentModelIndex === initialModel) {
throw lastError; // Tried all models
}
} while (this.currentModelIndex !== initialModel);
// This should never be reached, but TypeScript requires it
throw lastError || new Error('Retry failed');
}
doGenerate(options: LanguageModelV2CallOptions): PromiseLike<{
content: LanguageModelV2Content[];
finishReason: LanguageModelV2FinishReason;
usage: LanguageModelV2Usage;
providerMetadata?: SharedV2ProviderMetadata;
request?: { body?: unknown };
response?: {
headers?: Record<string, string>;
id?: string;
timestamp?: Date;
modelId?: string;
};
warnings: LanguageModelV2CallWarning[];
}> {
this.checkAndResetModel();
return this.retry(() => this.getCurrentModel().doGenerate(options));
}
doStream(options: LanguageModelV2CallOptions): PromiseLike<{
stream: ReadableStream<LanguageModelV2StreamPart>;
request?: { body?: unknown };
response?: { headers?: Record<string, string> };
}> {
this.checkAndResetModel();
const self = this;
const shouldRetry = this.settings.shouldRetryThisError || defaultShouldRetryThisError;
console.info(`[Fallback] Starting stream request...`);
return this.retry(async () => {
const result = await self.getCurrentModel().doStream(options);
let hasStreamedAny = false;
// Wrap the stream to handle errors and switch providers if needed
const wrappedStream = new ReadableStream<LanguageModelV2StreamPart>({
async start(controller) {
try {
const reader = result.stream.getReader();
let streamedChunks = 0;
while (true) {
const result = await reader.read();
const { done, value } = result;
if (!hasStreamedAny && value && typeof value === 'object' && 'error' in value) {
const error = value.error as RetryableError;
if (shouldRetry(error)) {
throw error;
}
}
if (done) {
console.info(
`[Fallback] Stream completed successfully. Streamed ${streamedChunks} chunks from ${self.modelId}`
);
break;
}
controller.enqueue(value);
streamedChunks++;
if (value?.type !== 'stream-start') {
hasStreamedAny = true;
}
}
controller.close();
} catch (error) {
// Check if this is a normal stream termination
const errorMessage = error instanceof Error ? error.message : String(error);
const isNormalTermination =
errorMessage === 'terminated' ||
errorMessage.includes('terminated') ||
errorMessage === 'aborted' ||
errorMessage.includes('aborted');
// If it's a normal termination and we've already streamed content, just close normally
if (isNormalTermination && hasStreamedAny) {
controller.close();
return;
}
if (self.settings.onError) {
try {
await self.settings.onError(error as RetryableError, self.modelId);
} catch {
// Don't let onError callback failures break the retry logic
}
}
if (!hasStreamedAny || self.retryAfterOutput) {
// If nothing was streamed yet, switch models and retry
console.warn(`Stream error on ${self.modelId}, attempting fallback...`);
self.switchToNextModel();
// Prevent infinite recursion - if we've tried all models, fail
if (self.currentModelIndex === 0) {
console.error('All models exhausted, failing request');
controller.error(error);
return;
}
try {
// Get the next model directly instead of recursive call
const nextModel = self.getCurrentModel();
const nextResult = await nextModel.doStream(options);
const nextReader = nextResult.stream.getReader();
while (true) {
const { done, value } = await nextReader.read();
if (done) break;
controller.enqueue(value);
}
controller.close();
} catch (nextError) {
controller.error(nextError);
}
return;
}
controller.error(error);
}
},
});
return {
stream: wrappedStream,
...(result.request && { request: result.request }),
...(result.response && { response: result.response }),
};
});
}
}

View File

@ -1,83 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { openaiModel } from './providers/openai';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
function initializeGPT5() {
if (_gpt5Instance) {
return _gpt5Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include OpenAI if API key is available
if (process.env.OPENAI_API_KEY) {
try {
models.push(openaiModel('gpt-5-mini-2025-08-07'));
console.info('GPT5: OpenAI model added to fallback chain');
} catch (error) {
console.warn('GPT5: Failed to initialize OpenAI model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
}
console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
_gpt5Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err, modelId) => {
// Handle various error formats
let errorMessage = 'Unknown error';
if (err instanceof Error) {
errorMessage = err.message;
} else if (err && typeof err === 'object') {
const errObj = err as Record<string, unknown>;
if ('message' in errObj) {
errorMessage = String(errObj.message);
}
if ('type' in errObj) {
errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
}
} else {
errorMessage = String(err);
}
const errorDetails =
err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
console.error('Error details:', errorDetails);
},
});
return _gpt5Instance;
}
// Export a proxy that initializes on first use
export const GPT5Mini = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeGPT5();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeGPT5();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeGPT5();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeGPT5();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export GPT-5 Mini model using AI Gateway
export const GPT5Mini = gatewayModel('openai/gpt-5-mini-2025-08-07');

View File

@ -1,83 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { openaiModel } from './providers/openai';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
function initializeGPT5() {
if (_gpt5Instance) {
return _gpt5Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include OpenAI if API key is available
if (process.env.OPENAI_API_KEY) {
try {
models.push(openaiModel('gpt-5-nano-2025-08-07'));
console.info('GPT5: OpenAI model added to fallback chain');
} catch (error) {
console.warn('GPT5: Failed to initialize OpenAI model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
}
console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
_gpt5Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err, modelId) => {
// Handle various error formats
let errorMessage = 'Unknown error';
if (err instanceof Error) {
errorMessage = err.message;
} else if (err && typeof err === 'object') {
const errObj = err as Record<string, unknown>;
if ('message' in errObj) {
errorMessage = String(errObj.message);
}
if ('type' in errObj) {
errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
}
} else {
errorMessage = String(err);
}
const errorDetails =
err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
console.error('Error details:', errorDetails);
},
});
return _gpt5Instance;
}
// Export a proxy that initializes on first use
export const GPT5Nano = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeGPT5();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeGPT5();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeGPT5();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeGPT5();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export GPT-5 Nano model using AI Gateway
export const GPT5Nano = gatewayModel('openai/gpt-5-nano-2025-08-07');

View File

@ -1,83 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { openaiModel } from './providers/openai';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
function initializeGPT5() {
if (_gpt5Instance) {
return _gpt5Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include OpenAI if API key is available
if (process.env.OPENAI_API_KEY) {
try {
models.push(openaiModel('gpt-5-2025-08-07'));
console.info('GPT5: OpenAI model added to fallback chain');
} catch (error) {
console.warn('GPT5: Failed to initialize OpenAI model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
}
console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
_gpt5Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err, modelId) => {
// Handle various error formats
let errorMessage = 'Unknown error';
if (err instanceof Error) {
errorMessage = err.message;
} else if (err && typeof err === 'object') {
const errObj = err as Record<string, unknown>;
if ('message' in errObj) {
errorMessage = String(errObj.message);
}
if ('type' in errObj) {
errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
}
} else {
errorMessage = String(err);
}
const errorDetails =
err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
console.error('Error details:', errorDetails);
},
});
return _gpt5Instance;
}
// Export a proxy that initializes on first use
export const GPT5 = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeGPT5();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeGPT5();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeGPT5();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeGPT5();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export GPT-5 model using AI Gateway
export const GPT5 = gatewayModel('openai/gpt-5-2025-08-07');

View File

@ -1,108 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { anthropicModel } from './providers/anthropic';
import { vertexModel } from './providers/vertex';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _haiku35Instance: ReturnType<typeof createFallback> | null = null;
function initializeHaiku35() {
if (_haiku35Instance) {
return _haiku35Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include Anthropic if API key is available
if (process.env.ANTHROPIC_API_KEY) {
try {
models.push(anthropicModel('claude-3-5-haiku-20241022'));
console.info('Haiku35: Anthropic model added to fallback chain');
} catch (error) {
console.warn('Haiku35: Failed to initialize Anthropic model:', error);
}
}
// Only include Vertex if all required credentials are available
if (
process.env.VERTEX_CLIENT_EMAIL &&
process.env.VERTEX_PRIVATE_KEY &&
process.env.VERTEX_PROJECT
) {
try {
models.push(vertexModel('claude-3-5-haiku@20241022'));
console.info('Haiku35: Vertex AI model added to fallback chain (fallback)');
} catch (error) {
console.warn('Haiku35: Failed to initialize Vertex AI model:', error);
}
} else {
const missing = [];
if (!process.env.VERTEX_CLIENT_EMAIL) missing.push('VERTEX_CLIENT_EMAIL');
if (!process.env.VERTEX_PRIVATE_KEY) missing.push('VERTEX_PRIVATE_KEY');
if (!process.env.VERTEX_PROJECT) missing.push('VERTEX_PROJECT');
console.info(
`Haiku35: Missing Vertex credentials (${missing.join(', ')}), skipping Vertex model`
);
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error(
'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
);
}
console.info(`Haiku35: Initialized with ${models.length} model(s) in fallback chain`);
_haiku35Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err, modelId) => {
// Handle various error formats
let errorMessage = 'Unknown error';
if (err instanceof Error) {
errorMessage = err.message;
} else if (err && typeof err === 'object') {
const errObj = err as Record<string, unknown>;
if ('message' in errObj) {
errorMessage = String(errObj.message);
}
if ('type' in errObj) {
errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
}
} else {
errorMessage = String(err);
}
const errorDetails =
err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
console.error('Error details:', errorDetails);
},
});
return _haiku35Instance;
}
// Export a proxy that initializes on first use
export const Haiku35 = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeHaiku35();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeHaiku35();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeHaiku35();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeHaiku35();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export Haiku 3.5 model using AI Gateway
export const Haiku35 = gatewayModel('anthropic/claude-3-5-haiku-20241022');

View File

@ -1,65 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { anthropicModel } from './providers/anthropic';
import { vertexModel } from './providers/vertex';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _opus41Instance: ReturnType<typeof createFallback> | null = null;
function initializeOpus41() {
if (_opus41Instance) {
return _opus41Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include Anthropic if API key is available
if (process.env.ANTHROPIC_API_KEY) {
try {
models.push(anthropicModel('claude-opus-4-1-20250805'));
console.info('Opus41: Anthropic model added to fallback chain');
} catch (error) {
console.warn('Opus41: Failed to initialize Anthropic model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error(
'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
);
}
console.info(`Opus41: Initialized with ${models.length} model(s) in fallback chain`);
_opus41Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err) => console.error(`FALLBACK. Here is the error: ${err}`),
});
return _opus41Instance;
}
// Export a proxy that initializes on first use
export const Opus41 = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeOpus41();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeOpus41();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeOpus41();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeOpus41();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export Opus 4.1 model using AI Gateway
export const Opus41 = gatewayModel('anthropic/claude-opus-4-1-20250805');

View File

@ -1,51 +0,0 @@
import { createAnthropic } from '@ai-sdk/anthropic';
import { wrapLanguageModel } from 'ai';
import { BraintrustMiddleware } from 'braintrust';
export const anthropicModel = (modelId: string) => {
const anthropic = createAnthropic({
headers: {
'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
},
fetch: ((url, options) => {
if (options?.body) {
try {
// Parse existing body if it's a string
const existingBody =
typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
// Append disable_parallel_tool_use if tool_choice is present
const modifiedBody = {
...existingBody,
};
if (modifiedBody.tool_choice) {
modifiedBody.tool_choice = {
...modifiedBody.tool_choice,
disable_parallel_tool_use: true,
};
}
// Return modified options
return fetch(url, {
...options,
body: JSON.stringify(modifiedBody),
});
} catch (error) {
console.error('Failed to parse request body:', error);
// If body parsing fails, fall back to original request
return fetch(url, options);
}
}
// For requests without body, pass through unchanged
return fetch(url, options);
}) as typeof fetch,
});
// Wrap the model with Braintrust middleware
return wrapLanguageModel({
model: anthropic(modelId),
middleware: BraintrustMiddleware({ debug: true }),
});
};

View File

@ -0,0 +1,69 @@
import { createGateway } from '@ai-sdk/gateway';
import { wrapLanguageModel } from 'ai';
import { BraintrustMiddleware } from 'braintrust';
// Create gateway instance with custom fetch for Anthropic headers
const gateway = createGateway({
...(process.env.AI_GATEWAY_API_KEY && { apiKey: process.env.AI_GATEWAY_API_KEY }),
// Custom fetch to inject Anthropic beta headers when needed
fetch: ((url, options) => {
// Check if this is an Anthropic request
if (typeof url === 'string' && url.includes('anthropic')) {
// Parse and modify the request body to add disable_parallel_tool_use
if (options?.body) {
try {
const existingBody =
typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
const modifiedBody = { ...existingBody };
if (modifiedBody.tool_choice) {
modifiedBody.tool_choice = {
...modifiedBody.tool_choice,
disable_parallel_tool_use: true,
};
}
// Add Anthropic beta headers
const headers = {
...options.headers,
'anthropic-beta':
'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
};
return fetch(url, {
...options,
headers,
body: JSON.stringify(modifiedBody),
});
} catch (error) {
console.error('Failed to parse request body:', error);
// If parsing fails, still add headers but don't modify body
const headers = {
...options.headers,
'anthropic-beta':
'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
};
return fetch(url, { ...options, headers });
}
}
// For requests without body, just add headers
const headers = {
...(options?.headers || {}),
'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
};
return fetch(url, { ...options, headers });
}
// For non-Anthropic requests, pass through unchanged
return fetch(url, options);
}) as typeof fetch,
});
// Export a function that creates wrapped models with Braintrust middleware
export const gatewayModel = (modelId: string) => {
return wrapLanguageModel({
model: gateway(modelId),
middleware: BraintrustMiddleware({ debug: true }),
});
};

View File

@ -1,15 +0,0 @@
import { createOpenAI } from '@ai-sdk/openai';
import { wrapLanguageModel } from 'ai';
import { BraintrustMiddleware } from 'braintrust';
export const openaiModel = (modelId: string) => {
const openai = createOpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
// Wrap the model with Braintrust middleware
return wrapLanguageModel({
model: openai(modelId),
middleware: BraintrustMiddleware({ debug: true }),
});
};

View File

@ -1,90 +0,0 @@
import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { wrapLanguageModel } from 'ai';
import { BraintrustMiddleware } from 'braintrust';
export const vertexModel = (modelId: string): LanguageModelV2 => {
// Create a proxy that validates credentials on first use
let actualModel: LanguageModelV2 | null = null;
const getActualModel = () => {
if (!actualModel) {
const clientEmail = process.env.VERTEX_CLIENT_EMAIL;
let privateKey = process.env.VERTEX_PRIVATE_KEY;
const project = process.env.VERTEX_PROJECT;
if (!clientEmail || !privateKey || !project) {
throw new Error(
'Missing required environment variables: VERTEX_CLIENT_EMAIL or VERTEX_PRIVATE_KEY'
);
}
// Handle escaped newlines in private key
privateKey = privateKey.replace(/\\n/g, '\n');
const vertex = createVertexAnthropic({
baseURL: `https://aiplatform.googleapis.com/v1/projects/${project}/locations/global/publishers/anthropic/models`,
location: 'global',
project,
googleAuthOptions: {
credentials: {
client_email: clientEmail,
private_key: privateKey,
},
},
headers: {
'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
},
fetch: ((url, options) => {
if (options?.body) {
try {
// Parse existing body if it's a string
const existingBody =
typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
// Append disable_parallel_tool_use if tool_choice is present
const modifiedBody = {
...existingBody,
};
if (modifiedBody.tool_choice) {
modifiedBody.tool_choice = {
...modifiedBody.tool_choice,
disable_parallel_tool_use: true,
};
}
// Return modified options
return fetch(url, {
...options,
body: JSON.stringify(modifiedBody),
});
} catch (error) {
console.error('Failed to parse request body:', error);
// If body parsing fails, fall back to original request
return fetch(url, options);
}
}
// For requests without body, pass through unchanged
return fetch(url, options);
}) as typeof fetch,
});
// Wrap the model with Braintrust middleware
actualModel = wrapLanguageModel({
model: vertex(modelId),
middleware: BraintrustMiddleware({ debug: true }),
});
}
return actualModel;
};
// Create a proxy that delegates all calls to the actual model
return new Proxy({} as LanguageModelV2, {
get(_target, prop) {
const model = getActualModel();
return Reflect.get(model, prop);
},
});
};

View File

@ -1,99 +1,4 @@
import type { LanguageModelV2 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { anthropicModel } from './providers/anthropic';
import { openaiModel } from './providers/openai';
import { vertexModel } from './providers/vertex';
import { gatewayModel } from './providers/gateway';
// Lazy initialization to allow mocking in tests
let _sonnet4Instance: ReturnType<typeof createFallback> | null = null;
function initializeSonnet4(): ReturnType<typeof createFallback> {
if (_sonnet4Instance) {
return _sonnet4Instance;
}
// Build models array based on available credentials
const models: LanguageModelV2[] = [];
// Only include Anthropic if API key is available
if (process.env.ANTHROPIC_API_KEY) {
try {
models.push(anthropicModel('claude-4-sonnet-20250514'));
console.info('Sonnet4: Anthropic model added to fallback chain (primary)');
} catch (error) {
console.warn('Sonnet4: Failed to initialize Anthropic model:', error);
}
} else {
console.info('Sonnet4: No ANTHROPIC_API_KEY found, skipping Anthropic model');
}
if (process.env.OPENAI_API_KEY) {
try {
models.push(openaiModel('gpt-5'));
console.info('Sonnet4: OpenAI model added to fallback chain');
} catch (error) {
console.warn('Sonnet4: Failed to initialize OpenAI model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error(
'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
);
}
console.info(`Sonnet4: Initialized with ${models.length} model(s) in fallback chain`);
_sonnet4Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err, modelId) => {
// Handle various error formats
let errorMessage = 'Unknown error';
if (err instanceof Error) {
errorMessage = err.message;
} else if (err && typeof err === 'object') {
const errObj = err as Record<string, unknown>;
if ('message' in errObj) {
errorMessage = String(errObj.message);
}
if ('type' in errObj) {
errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
}
} else {
errorMessage = String(err);
}
const errorDetails =
err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
console.error('Error details:', errorDetails);
},
});
return _sonnet4Instance;
}
// Export a proxy that initializes on first use
export const Sonnet4 = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeSonnet4();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeSonnet4();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeSonnet4();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeSonnet4();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});
// Export Sonnet 4 model using AI Gateway
export const Sonnet4 = gatewayModel('anthropic/claude-4-sonnet-20250514');

View File

@ -877,6 +877,9 @@ importers:
'@ai-sdk/anthropic':
specifier: ^2.0.0
version: 2.0.1(zod@3.25.76)
'@ai-sdk/gateway':
specifier: ^1.0.15
version: 1.0.15(zod@3.25.76)
'@ai-sdk/google-vertex':
specifier: ^3.0.0
version: 3.0.3(zod@3.25.76)
@ -1248,6 +1251,12 @@ packages:
peerDependencies:
zod: ^3.25.76 || ^4
'@ai-sdk/gateway@1.0.15':
resolution: {integrity: sha512-xySXoQ29+KbGuGfmDnABx+O6vc7Gj7qugmj1kGpn0rW0rQNn6UKUuvscKMzWyv1Uv05GyC1vqHq8ZhEOLfXscQ==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4
'@ai-sdk/gateway@1.0.3':
resolution: {integrity: sha512-QRGz2vH1WR9NvCv8gWocoebAKiXcuqj22mug6i8COeVsp33x5K5cK2DT4TwiQx5SfYbqJbVoBT+UqnHF7A3PHA==}
engines: {node: '>=18'}
@ -1290,6 +1299,12 @@ packages:
peerDependencies:
zod: ^3.25.76 || ^4
'@ai-sdk/provider-utils@3.0.7':
resolution: {integrity: sha512-o3BS5/t8KnBL3ubP8k3w77AByOypLm+pkIL/DCw0qKkhDbvhCy+L3hRTGPikpdb8WHcylAeKsjgwOxhj4cqTUA==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4
'@ai-sdk/provider@1.1.3':
resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==}
engines: {node: '>=18'}
@ -7620,6 +7635,10 @@ packages:
resolution: {integrity: sha512-nVpZkTMM9rF6AQ9gPJpFsNAMt48wIzB5TQgiTLdHiuO8XEDhUgZEhqKlZWXbIzo9VmJ/HvysHqEaVeD5v9TPvA==}
engines: {node: '>=20.0.0'}
eventsource-parser@3.0.6:
resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
engines: {node: '>=18.0.0'}
eventsource@3.0.7:
resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==}
engines: {node: '>=18.0.0'}
@ -12325,6 +12344,12 @@ snapshots:
'@ai-sdk/provider-utils': 3.0.1(zod@3.25.76)
zod: 3.25.76
'@ai-sdk/gateway@1.0.15(zod@3.25.76)':
dependencies:
'@ai-sdk/provider': 2.0.0
'@ai-sdk/provider-utils': 3.0.7(zod@3.25.76)
zod: 3.25.76
'@ai-sdk/gateway@1.0.3(zod@3.25.1)':
dependencies:
'@ai-sdk/provider': 2.0.0
@ -12391,6 +12416,13 @@ snapshots:
zod: 3.25.76
zod-to-json-schema: 3.24.6(zod@3.25.76)
'@ai-sdk/provider-utils@3.0.7(zod@3.25.76)':
dependencies:
'@ai-sdk/provider': 2.0.0
'@standard-schema/spec': 1.0.0
eventsource-parser: 3.0.6
zod: 3.25.76
'@ai-sdk/provider@1.1.3':
dependencies:
json-schema: 0.4.0
@ -18323,14 +18355,14 @@ snapshots:
msw: 2.10.4(@types/node@20.19.4)(typescript@5.9.2)
vite: 7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)
'@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
'@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
dependencies:
'@vitest/spy': 3.2.4
estree-walker: 3.0.3
magic-string: 0.30.17
optionalDependencies:
msw: 2.10.4(@types/node@24.0.10)(typescript@5.9.2)
vite: 7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)
vite: 7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)
'@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.1.3(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
dependencies:
@ -20420,6 +20452,8 @@ snapshots:
eventsource-parser@3.0.3: {}
eventsource-parser@3.0.6: {}
eventsource@3.0.7:
dependencies:
eventsource-parser: 3.0.3
@ -25525,7 +25559,7 @@ snapshots:
dependencies:
'@types/chai': 5.2.2
'@vitest/expect': 3.2.4
'@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))
'@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))
'@vitest/pretty-format': 3.2.4
'@vitest/runner': 3.2.4
'@vitest/snapshot': 3.2.4