Update AI SDK dependencies and refactor model initialization to use AI Gateway. Remove deprecated fallback logic and related tests for improved performance and maintainability.

2025-09-02 17:20:39 -06:00 · 2025-09-02 17:20:39 -06:00 · d524e33996
parent 497598afe1
commit d524e33996
14 changed files with 125 additions and 2065 deletions
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -38,10 +38,7 @@
    "braintrust:push:tools:staged": "npx braintrust push evals/agents/analyst-agent/tool-order/staged-scorers.ts"
  },
  "dependencies": {
-    "@ai-sdk/anthropic": "^2.0.0",
-    "@ai-sdk/google-vertex": "^3.0.0",
-    "@ai-sdk/openai": "^2.0.0",
-    "@ai-sdk/provider": "^2.0.0",
+    "@ai-sdk/gateway": "^1.0.15",
    "@buster/access-controls": "workspace:*",
    "@buster/data-source": "workspace:*",
    "@buster/database": "workspace:*",
--- a/packages/ai/src/llm/ai-fallback.test.ts
+++ b/packages/ai/src/llm/ai-fallback.test.ts
--- a/packages/ai/src/llm/ai-fallback.ts
+++ b/packages/ai/src/llm/ai-fallback.ts
@ -1,345 +0,0 @@
-import type {
-  LanguageModelV2,
-  LanguageModelV2CallOptions,
-  LanguageModelV2CallWarning,
-  LanguageModelV2Content,
-  LanguageModelV2FinishReason,
-  LanguageModelV2StreamPart,
-  LanguageModelV2Usage,
-  SharedV2ProviderMetadata,
-} from '@ai-sdk/provider';
-
-interface RetryableError extends Error {
-  statusCode?: number;
-}
-
-interface Settings {
-  models: LanguageModelV2[];
-  retryAfterOutput?: boolean;
-  modelResetInterval?: number;
-  maxRetriesPerModel?: number;
-  shouldRetryThisError?: (error: RetryableError) => boolean;
-  onError?: (error: RetryableError, modelId: string) => void | Promise<void>;
-}
-
-export function createFallback(settings: Settings): FallbackModel {
-  return new FallbackModel(settings);
-}
-
-const retryableStatusCodes = [
-  401, // wrong API key
-  403, // permission error, like cannot access model or from a non accessible region
-  408, // request timeout
-  409, // conflict
-  413, // payload too large
-  429, // too many requests/rate limits
-  500, // server error (and above)
-];
-// Common error messages/codes that indicate server overload or temporary issues
-const retryableErrors = [
-  'overloaded',
-  'service unavailable',
-  'bad gateway',
-  'too many requests',
-  'internal server error',
-  'gateway timeout',
-  'rate_limit',
-  'wrong-key',
-  'unexpected',
-  'capacity',
-  'timeout',
-  'server_error',
-  '429', // Too Many Requests
-  '500', // Internal Server Error
-  '502', // Bad Gateway
-  '503', // Service Unavailable
-  '504', // Gateway Timeout
-];
-
-function defaultShouldRetryThisError(error: RetryableError): boolean {
-  // Handle null/undefined errors
-  if (!error) return false;
-
-  const statusCode = error.statusCode;
-
-  if (statusCode && (retryableStatusCodes.includes(statusCode) || statusCode >= 500)) {
-    return true;
-  }
-
-  if (error.message) {
-    const errorString = error.message.toLowerCase();
-    return retryableErrors.some((errType) => errorString.includes(errType));
-  }
-
-  // Check error object properties for retryable patterns
-  if (typeof error === 'object') {
-    try {
-      const errorString = JSON.stringify(error).toLowerCase();
-      return retryableErrors.some((errType) => errorString.includes(errType));
-    } catch {
-      // JSON.stringify can throw on circular references
-      return false;
-    }
-  }
-  return false;
-}
-
-function simpleBackoff(attempt: number): number {
-  return Math.min(1000 * 2 ** attempt, 10000); // 1s, 2s, 4s, 8s, max 10s
-}
-
-export class FallbackModel implements LanguageModelV2 {
-  readonly specificationVersion = 'v2';
-
-  get supportedUrls(): Record<string, RegExp[]> | PromiseLike<Record<string, RegExp[]>> {
-    return this.getCurrentModel().supportedUrls;
-  }
-
-  get modelId(): string {
-    return this.getCurrentModel().modelId;
-  }
-  readonly settings: Settings;
-
-  private _currentModelIndex = 0;
-  private lastModelReset: number = Date.now();
-  private readonly modelResetInterval: number;
-  retryAfterOutput: boolean;
-
-  get currentModelIndex(): number {
-    return this._currentModelIndex;
-  }
-
-  private set currentModelIndex(value: number) {
-    this._currentModelIndex = value;
-  }
-  constructor(settings: Settings) {
-    this.settings = settings;
-    this.modelResetInterval = settings.modelResetInterval ?? 3 * 60 * 1000; // Default 3 minutes in ms
-    this.retryAfterOutput = settings.retryAfterOutput ?? true;
-
-    if (!this.settings.models[this._currentModelIndex]) {
-      throw new Error('No models available in settings');
-    }
-  }
-
-  get provider(): string {
-    return this.getCurrentModel().provider;
-  }
-
-  private getCurrentModel(): LanguageModelV2 {
-    const model = this.settings.models[this._currentModelIndex];
-    if (!model) {
-      throw new Error(`No model available at index ${this._currentModelIndex}`);
-    }
-    console.info(
-      `[Fallback] Using model: ${model.modelId} (index: ${this._currentModelIndex}/${this.settings.models.length - 1})`
-    );
-    return model;
-  }
-
-  private checkAndResetModel() {
-    // Only reset if we're not already on the primary model
-    if (this.currentModelIndex === 0) return;
-
-    const now = Date.now();
-    if (now - this.lastModelReset >= this.modelResetInterval) {
-      // Reset to primary model
-      console.info(
-        `[Fallback] Resetting to primary model after ${this.modelResetInterval}ms timeout`
-      );
-      this.currentModelIndex = 0;
-      this.lastModelReset = now;
-    }
-  }
-
-  private switchToNextModel() {
-    const previousModel = this.settings.models[this.currentModelIndex]?.modelId || 'unknown';
-    this.currentModelIndex = (this.currentModelIndex + 1) % this.settings.models.length;
-    const nextModel = this.settings.models[this.currentModelIndex]?.modelId || 'unknown';
-    console.warn(`Switching from model ${previousModel} to ${nextModel} due to error`);
-  }
-
-  private async retry<T>(fn: () => PromiseLike<T>): Promise<T> {
-    let lastError: RetryableError | undefined;
-    const initialModel = this.currentModelIndex;
-    const maxRetriesPerModel = this.settings.maxRetriesPerModel ?? 2;
-
-    do {
-      let modelRetryCount = 0;
-
-      // Retry current model up to maxRetriesPerModel times
-      while (modelRetryCount < maxRetriesPerModel) {
-        try {
-          const result = await fn();
-          if (modelRetryCount > 0 || this.currentModelIndex !== initialModel) {
-            console.info(
-              `[Fallback] Request succeeded on model ${this.modelId} after ${modelRetryCount} retries`
-            );
-          }
-          return result;
-        } catch (error) {
-          lastError = error as RetryableError;
-          const shouldRetry = this.settings.shouldRetryThisError || defaultShouldRetryThisError;
-
-          if (!shouldRetry(lastError)) {
-            throw lastError; // Non-retryable error
-          }
-
-          if (this.settings.onError) {
-            try {
-              await this.settings.onError(lastError, this.modelId);
-            } catch {
-              // Don't let onError callback failures break the retry logic
-            }
-          }
-
-          modelRetryCount++;
-
-          if (modelRetryCount < maxRetriesPerModel) {
-            // Wait before retrying same model
-            await new Promise((resolve) => setTimeout(resolve, simpleBackoff(modelRetryCount - 1)));
-          }
-        }
-      }
-
-      // All retries for this model exhausted, switch to next model
-      console.warn(
-        `Model ${this.modelId} exhausted ${maxRetriesPerModel} retries, switching to next model`
-      );
-      this.switchToNextModel();
-
-      if (this.currentModelIndex === initialModel) {
-        throw lastError; // Tried all models
-      }
-    } while (this.currentModelIndex !== initialModel);
-
-    // This should never be reached, but TypeScript requires it
-    throw lastError || new Error('Retry failed');
-  }
-
-  doGenerate(options: LanguageModelV2CallOptions): PromiseLike<{
-    content: LanguageModelV2Content[];
-    finishReason: LanguageModelV2FinishReason;
-    usage: LanguageModelV2Usage;
-    providerMetadata?: SharedV2ProviderMetadata;
-    request?: { body?: unknown };
-    response?: {
-      headers?: Record<string, string>;
-      id?: string;
-      timestamp?: Date;
-      modelId?: string;
-    };
-    warnings: LanguageModelV2CallWarning[];
-  }> {
-    this.checkAndResetModel();
-    return this.retry(() => this.getCurrentModel().doGenerate(options));
-  }
-
-  doStream(options: LanguageModelV2CallOptions): PromiseLike<{
-    stream: ReadableStream<LanguageModelV2StreamPart>;
-    request?: { body?: unknown };
-    response?: { headers?: Record<string, string> };
-  }> {
-    this.checkAndResetModel();
-    const self = this;
-    const shouldRetry = this.settings.shouldRetryThisError || defaultShouldRetryThisError;
-    console.info(`[Fallback] Starting stream request...`);
-    return this.retry(async () => {
-      const result = await self.getCurrentModel().doStream(options);
-
-      let hasStreamedAny = false;
-      // Wrap the stream to handle errors and switch providers if needed
-      const wrappedStream = new ReadableStream<LanguageModelV2StreamPart>({
-        async start(controller) {
-          try {
-            const reader = result.stream.getReader();
-
-            let streamedChunks = 0;
-            while (true) {
-              const result = await reader.read();
-
-              const { done, value } = result;
-              if (!hasStreamedAny && value && typeof value === 'object' && 'error' in value) {
-                const error = value.error as RetryableError;
-                if (shouldRetry(error)) {
-                  throw error;
-                }
-              }
-
-              if (done) {
-                console.info(
-                  `[Fallback] Stream completed successfully. Streamed ${streamedChunks} chunks from ${self.modelId}`
-                );
-                break;
-              }
-              controller.enqueue(value);
-              streamedChunks++;
-
-              if (value?.type !== 'stream-start') {
-                hasStreamedAny = true;
-              }
-            }
-            controller.close();
-          } catch (error) {
-            // Check if this is a normal stream termination
-            const errorMessage = error instanceof Error ? error.message : String(error);
-            const isNormalTermination =
-              errorMessage === 'terminated' ||
-              errorMessage.includes('terminated') ||
-              errorMessage === 'aborted' ||
-              errorMessage.includes('aborted');
-
-            // If it's a normal termination and we've already streamed content, just close normally
-            if (isNormalTermination && hasStreamedAny) {
-              controller.close();
-              return;
-            }
-
-            if (self.settings.onError) {
-              try {
-                await self.settings.onError(error as RetryableError, self.modelId);
-              } catch {
-                // Don't let onError callback failures break the retry logic
-              }
-            }
-            if (!hasStreamedAny || self.retryAfterOutput) {
-              // If nothing was streamed yet, switch models and retry
-              console.warn(`Stream error on ${self.modelId}, attempting fallback...`);
-              self.switchToNextModel();
-
-              // Prevent infinite recursion - if we've tried all models, fail
-              if (self.currentModelIndex === 0) {
-                console.error('All models exhausted, failing request');
-                controller.error(error);
-                return;
-              }
-
-              try {
-                // Get the next model directly instead of recursive call
-                const nextModel = self.getCurrentModel();
-                const nextResult = await nextModel.doStream(options);
-                const nextReader = nextResult.stream.getReader();
-                while (true) {
-                  const { done, value } = await nextReader.read();
-                  if (done) break;
-                  controller.enqueue(value);
-                }
-                controller.close();
-              } catch (nextError) {
-                controller.error(nextError);
-              }
-              return;
-            }
-            controller.error(error);
-          }
-        },
-      });
-
-      return {
-        stream: wrappedStream,
-        ...(result.request && { request: result.request }),
-        ...(result.response && { response: result.response }),
-      };
-    });
-  }
-}
--- a/packages/ai/src/llm/gpt-5-mini.ts
+++ b/packages/ai/src/llm/gpt-5-mini.ts
@ -1,83 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { openaiModel } from './providers/openai';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeGPT5() {
-  if (_gpt5Instance) {
-    return _gpt5Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include OpenAI if API key is available
-  if (process.env.OPENAI_API_KEY) {
-    try {
-      models.push(openaiModel('gpt-5-mini-2025-08-07'));
-      console.info('GPT5: OpenAI model added to fallback chain');
-    } catch (error) {
-      console.warn('GPT5: Failed to initialize OpenAI model:', error);
-    }
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
-  }
-
-  console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _gpt5Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err, modelId) => {
-      // Handle various error formats
-      let errorMessage = 'Unknown error';
-      if (err instanceof Error) {
-        errorMessage = err.message;
-      } else if (err && typeof err === 'object') {
-        const errObj = err as Record<string, unknown>;
-        if ('message' in errObj) {
-          errorMessage = String(errObj.message);
-        }
-        if ('type' in errObj) {
-          errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
-        }
-      } else {
-        errorMessage = String(err);
-      }
-
-      const errorDetails =
-        err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
-      console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
-      console.error('Error details:', errorDetails);
-    },
-  });
-
-  return _gpt5Instance;
-}
-
-// Export a proxy that initializes on first use
-export const GPT5Mini = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeGPT5();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeGPT5();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeGPT5();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeGPT5();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export GPT-5 Mini model using AI Gateway
+export const GPT5Mini = gatewayModel('openai/gpt-5-mini-2025-08-07');
--- a/packages/ai/src/llm/gpt-5-nano.ts
+++ b/packages/ai/src/llm/gpt-5-nano.ts
@ -1,83 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { openaiModel } from './providers/openai';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeGPT5() {
-  if (_gpt5Instance) {
-    return _gpt5Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include OpenAI if API key is available
-  if (process.env.OPENAI_API_KEY) {
-    try {
-      models.push(openaiModel('gpt-5-nano-2025-08-07'));
-      console.info('GPT5: OpenAI model added to fallback chain');
-    } catch (error) {
-      console.warn('GPT5: Failed to initialize OpenAI model:', error);
-    }
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
-  }
-
-  console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _gpt5Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err, modelId) => {
-      // Handle various error formats
-      let errorMessage = 'Unknown error';
-      if (err instanceof Error) {
-        errorMessage = err.message;
-      } else if (err && typeof err === 'object') {
-        const errObj = err as Record<string, unknown>;
-        if ('message' in errObj) {
-          errorMessage = String(errObj.message);
-        }
-        if ('type' in errObj) {
-          errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
-        }
-      } else {
-        errorMessage = String(err);
-      }
-
-      const errorDetails =
-        err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
-      console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
-      console.error('Error details:', errorDetails);
-    },
-  });
-
-  return _gpt5Instance;
-}
-
-// Export a proxy that initializes on first use
-export const GPT5Nano = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeGPT5();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeGPT5();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeGPT5();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeGPT5();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export GPT-5 Nano model using AI Gateway
+export const GPT5Nano = gatewayModel('openai/gpt-5-nano-2025-08-07');
--- a/packages/ai/src/llm/gpt-5.ts
+++ b/packages/ai/src/llm/gpt-5.ts
@ -1,83 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { openaiModel } from './providers/openai';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeGPT5() {
-  if (_gpt5Instance) {
-    return _gpt5Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include OpenAI if API key is available
-  if (process.env.OPENAI_API_KEY) {
-    try {
-      models.push(openaiModel('gpt-5-2025-08-07'));
-      console.info('GPT5: OpenAI model added to fallback chain');
-    } catch (error) {
-      console.warn('GPT5: Failed to initialize OpenAI model:', error);
-    }
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
-  }
-
-  console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _gpt5Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err, modelId) => {
-      // Handle various error formats
-      let errorMessage = 'Unknown error';
-      if (err instanceof Error) {
-        errorMessage = err.message;
-      } else if (err && typeof err === 'object') {
-        const errObj = err as Record<string, unknown>;
-        if ('message' in errObj) {
-          errorMessage = String(errObj.message);
-        }
-        if ('type' in errObj) {
-          errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
-        }
-      } else {
-        errorMessage = String(err);
-      }
-
-      const errorDetails =
-        err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
-      console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
-      console.error('Error details:', errorDetails);
-    },
-  });
-
-  return _gpt5Instance;
-}
-
-// Export a proxy that initializes on first use
-export const GPT5 = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeGPT5();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeGPT5();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeGPT5();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeGPT5();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export GPT-5 model using AI Gateway
+export const GPT5 = gatewayModel('openai/gpt-5-2025-08-07');
--- a/packages/ai/src/llm/haiku-3-5.ts
+++ b/packages/ai/src/llm/haiku-3-5.ts
@ -1,108 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { anthropicModel } from './providers/anthropic';
-import { vertexModel } from './providers/vertex';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _haiku35Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeHaiku35() {
-  if (_haiku35Instance) {
-    return _haiku35Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include Anthropic if API key is available
-  if (process.env.ANTHROPIC_API_KEY) {
-    try {
-      models.push(anthropicModel('claude-3-5-haiku-20241022'));
-      console.info('Haiku35: Anthropic model added to fallback chain');
-    } catch (error) {
-      console.warn('Haiku35: Failed to initialize Anthropic model:', error);
-    }
-  }
-
-  // Only include Vertex if all required credentials are available
-  if (
-    process.env.VERTEX_CLIENT_EMAIL &&
-    process.env.VERTEX_PRIVATE_KEY &&
-    process.env.VERTEX_PROJECT
-  ) {
-    try {
-      models.push(vertexModel('claude-3-5-haiku@20241022'));
-      console.info('Haiku35: Vertex AI model added to fallback chain (fallback)');
-    } catch (error) {
-      console.warn('Haiku35: Failed to initialize Vertex AI model:', error);
-    }
-  } else {
-    const missing = [];
-    if (!process.env.VERTEX_CLIENT_EMAIL) missing.push('VERTEX_CLIENT_EMAIL');
-    if (!process.env.VERTEX_PRIVATE_KEY) missing.push('VERTEX_PRIVATE_KEY');
-    if (!process.env.VERTEX_PROJECT) missing.push('VERTEX_PROJECT');
-    console.info(
-      `Haiku35: Missing Vertex credentials (${missing.join(', ')}), skipping Vertex model`
-    );
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error(
-      'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
-    );
-  }
-
-  console.info(`Haiku35: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _haiku35Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err, modelId) => {
-      // Handle various error formats
-      let errorMessage = 'Unknown error';
-      if (err instanceof Error) {
-        errorMessage = err.message;
-      } else if (err && typeof err === 'object') {
-        const errObj = err as Record<string, unknown>;
-        if ('message' in errObj) {
-          errorMessage = String(errObj.message);
-        }
-        if ('type' in errObj) {
-          errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
-        }
-      } else {
-        errorMessage = String(err);
-      }
-
-      const errorDetails =
-        err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
-      console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
-      console.error('Error details:', errorDetails);
-    },
-  });
-
-  return _haiku35Instance;
-}
-
-// Export a proxy that initializes on first use
-export const Haiku35 = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeHaiku35();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeHaiku35();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeHaiku35();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeHaiku35();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export Haiku 3.5 model using AI Gateway
+export const Haiku35 = gatewayModel('anthropic/claude-3-5-haiku-20241022');
--- a/packages/ai/src/llm/opus-4-1.ts
+++ b/packages/ai/src/llm/opus-4-1.ts
@ -1,65 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { anthropicModel } from './providers/anthropic';
-import { vertexModel } from './providers/vertex';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _opus41Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeOpus41() {
-  if (_opus41Instance) {
-    return _opus41Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include Anthropic if API key is available
-  if (process.env.ANTHROPIC_API_KEY) {
-    try {
-      models.push(anthropicModel('claude-opus-4-1-20250805'));
-      console.info('Opus41: Anthropic model added to fallback chain');
-    } catch (error) {
-      console.warn('Opus41: Failed to initialize Anthropic model:', error);
-    }
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error(
-      'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
-    );
-  }
-
-  console.info(`Opus41: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _opus41Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err) => console.error(`FALLBACK.  Here is the error: ${err}`),
-  });
-
-  return _opus41Instance;
-}
-
-// Export a proxy that initializes on first use
-export const Opus41 = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeOpus41();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeOpus41();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeOpus41();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeOpus41();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export Opus 4.1 model using AI Gateway
+export const Opus41 = gatewayModel('anthropic/claude-opus-4-1-20250805');
--- a/packages/ai/src/llm/providers/anthropic.ts
+++ b/packages/ai/src/llm/providers/anthropic.ts
@ -1,51 +0,0 @@
-import { createAnthropic } from '@ai-sdk/anthropic';
-import { wrapLanguageModel } from 'ai';
-import { BraintrustMiddleware } from 'braintrust';
-
-export const anthropicModel = (modelId: string) => {
-  const anthropic = createAnthropic({
-    headers: {
-      'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
-    },
-    fetch: ((url, options) => {
-      if (options?.body) {
-        try {
-          // Parse existing body if it's a string
-          const existingBody =
-            typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
-
-          // Append disable_parallel_tool_use if tool_choice is present
-          const modifiedBody = {
-            ...existingBody,
-          };
-
-          if (modifiedBody.tool_choice) {
-            modifiedBody.tool_choice = {
-              ...modifiedBody.tool_choice,
-              disable_parallel_tool_use: true,
-            };
-          }
-
-          // Return modified options
-          return fetch(url, {
-            ...options,
-            body: JSON.stringify(modifiedBody),
-          });
-        } catch (error) {
-          console.error('Failed to parse request body:', error);
-          // If body parsing fails, fall back to original request
-          return fetch(url, options);
-        }
-      }
-
-      // For requests without body, pass through unchanged
-      return fetch(url, options);
-    }) as typeof fetch,
-  });
-
-  // Wrap the model with Braintrust middleware
-  return wrapLanguageModel({
-    model: anthropic(modelId),
-    middleware: BraintrustMiddleware({ debug: true }),
-  });
-};
--- a/packages/ai/src/llm/providers/gateway.ts
+++ b/packages/ai/src/llm/providers/gateway.ts
@ -0,0 +1,69 @@
+import { createGateway } from '@ai-sdk/gateway';
+import { wrapLanguageModel } from 'ai';
+import { BraintrustMiddleware } from 'braintrust';
+
+// Create gateway instance with custom fetch for Anthropic headers
+const gateway = createGateway({
+  ...(process.env.AI_GATEWAY_API_KEY && { apiKey: process.env.AI_GATEWAY_API_KEY }),
+  // Custom fetch to inject Anthropic beta headers when needed
+  fetch: ((url, options) => {
+    // Check if this is an Anthropic request
+    if (typeof url === 'string' && url.includes('anthropic')) {
+      // Parse and modify the request body to add disable_parallel_tool_use
+      if (options?.body) {
+        try {
+          const existingBody =
+            typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
+
+          const modifiedBody = { ...existingBody };
+          if (modifiedBody.tool_choice) {
+            modifiedBody.tool_choice = {
+              ...modifiedBody.tool_choice,
+              disable_parallel_tool_use: true,
+            };
+          }
+
+          // Add Anthropic beta headers
+          const headers = {
+            ...options.headers,
+            'anthropic-beta':
+              'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
+          };
+
+          return fetch(url, {
+            ...options,
+            headers,
+            body: JSON.stringify(modifiedBody),
+          });
+        } catch (error) {
+          console.error('Failed to parse request body:', error);
+          // If parsing fails, still add headers but don't modify body
+          const headers = {
+            ...options.headers,
+            'anthropic-beta':
+              'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
+          };
+          return fetch(url, { ...options, headers });
+        }
+      }
+
+      // For requests without body, just add headers
+      const headers = {
+        ...(options?.headers || {}),
+        'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
+      };
+      return fetch(url, { ...options, headers });
+    }
+
+    // For non-Anthropic requests, pass through unchanged
+    return fetch(url, options);
+  }) as typeof fetch,
+});
+
+// Export a function that creates wrapped models with Braintrust middleware
+export const gatewayModel = (modelId: string) => {
+  return wrapLanguageModel({
+    model: gateway(modelId),
+    middleware: BraintrustMiddleware({ debug: true }),
+  });
+};
--- a/packages/ai/src/llm/providers/openai.ts
+++ b/packages/ai/src/llm/providers/openai.ts
@ -1,15 +0,0 @@
-import { createOpenAI } from '@ai-sdk/openai';
-import { wrapLanguageModel } from 'ai';
-import { BraintrustMiddleware } from 'braintrust';
-
-export const openaiModel = (modelId: string) => {
-  const openai = createOpenAI({
-    apiKey: process.env.OPENAI_API_KEY,
-  });
-
-  // Wrap the model with Braintrust middleware
-  return wrapLanguageModel({
-    model: openai(modelId),
-    middleware: BraintrustMiddleware({ debug: true }),
-  });
-};
--- a/packages/ai/src/llm/providers/vertex.ts
+++ b/packages/ai/src/llm/providers/vertex.ts
@ -1,90 +0,0 @@
-import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { wrapLanguageModel } from 'ai';
-import { BraintrustMiddleware } from 'braintrust';
-
-export const vertexModel = (modelId: string): LanguageModelV2 => {
-  // Create a proxy that validates credentials on first use
-  let actualModel: LanguageModelV2 | null = null;
-
-  const getActualModel = () => {
-    if (!actualModel) {
-      const clientEmail = process.env.VERTEX_CLIENT_EMAIL;
-      let privateKey = process.env.VERTEX_PRIVATE_KEY;
-      const project = process.env.VERTEX_PROJECT;
-
-      if (!clientEmail || !privateKey || !project) {
-        throw new Error(
-          'Missing required environment variables: VERTEX_CLIENT_EMAIL or VERTEX_PRIVATE_KEY'
-        );
-      }
-
-      // Handle escaped newlines in private key
-      privateKey = privateKey.replace(/\\n/g, '\n');
-
-      const vertex = createVertexAnthropic({
-        baseURL: `https://aiplatform.googleapis.com/v1/projects/${project}/locations/global/publishers/anthropic/models`,
-        location: 'global',
-        project,
-        googleAuthOptions: {
-          credentials: {
-            client_email: clientEmail,
-            private_key: privateKey,
-          },
-        },
-        headers: {
-          'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14,extended-cache-ttl-2025-04-11',
-        },
-        fetch: ((url, options) => {
-          if (options?.body) {
-            try {
-              // Parse existing body if it's a string
-              const existingBody =
-                typeof options.body === 'string' ? JSON.parse(options.body) : options.body;
-
-              // Append disable_parallel_tool_use if tool_choice is present
-              const modifiedBody = {
-                ...existingBody,
-              };
-
-              if (modifiedBody.tool_choice) {
-                modifiedBody.tool_choice = {
-                  ...modifiedBody.tool_choice,
-                  disable_parallel_tool_use: true,
-                };
-              }
-
-              // Return modified options
-              return fetch(url, {
-                ...options,
-                body: JSON.stringify(modifiedBody),
-              });
-            } catch (error) {
-              console.error('Failed to parse request body:', error);
-              // If body parsing fails, fall back to original request
-              return fetch(url, options);
-            }
-          }
-
-          // For requests without body, pass through unchanged
-          return fetch(url, options);
-        }) as typeof fetch,
-      });
-
-      // Wrap the model with Braintrust middleware
-      actualModel = wrapLanguageModel({
-        model: vertex(modelId),
-        middleware: BraintrustMiddleware({ debug: true }),
-      });
-    }
-    return actualModel;
-  };
-
-  // Create a proxy that delegates all calls to the actual model
-  return new Proxy({} as LanguageModelV2, {
-    get(_target, prop) {
-      const model = getActualModel();
-      return Reflect.get(model, prop);
-    },
-  });
-};
--- a/packages/ai/src/llm/sonnet-4.ts
+++ b/packages/ai/src/llm/sonnet-4.ts
@ -1,99 +1,4 @@
-import type { LanguageModelV2 } from '@ai-sdk/provider';
-import { createFallback } from './ai-fallback';
-import { anthropicModel } from './providers/anthropic';
-import { openaiModel } from './providers/openai';
-import { vertexModel } from './providers/vertex';
+import { gatewayModel } from './providers/gateway';

-// Lazy initialization to allow mocking in tests
-let _sonnet4Instance: ReturnType<typeof createFallback> | null = null;
-
-function initializeSonnet4(): ReturnType<typeof createFallback> {
-  if (_sonnet4Instance) {
-    return _sonnet4Instance;
-  }
-
-  // Build models array based on available credentials
-  const models: LanguageModelV2[] = [];
-
-  // Only include Anthropic if API key is available
-  if (process.env.ANTHROPIC_API_KEY) {
-    try {
-      models.push(anthropicModel('claude-4-sonnet-20250514'));
-      console.info('Sonnet4: Anthropic model added to fallback chain (primary)');
-    } catch (error) {
-      console.warn('Sonnet4: Failed to initialize Anthropic model:', error);
-    }
-  } else {
-    console.info('Sonnet4: No ANTHROPIC_API_KEY found, skipping Anthropic model');
-  }
-
-  if (process.env.OPENAI_API_KEY) {
-    try {
-      models.push(openaiModel('gpt-5'));
-
-      console.info('Sonnet4: OpenAI model added to fallback chain');
-    } catch (error) {
-      console.warn('Sonnet4: Failed to initialize OpenAI model:', error);
-    }
-  }
-
-  // Ensure we have at least one model
-  if (models.length === 0) {
-    throw new Error(
-      'No AI models available. Please set either Vertex AI (VERTEX_CLIENT_EMAIL and VERTEX_PRIVATE_KEY) or Anthropic (ANTHROPIC_API_KEY) credentials.'
-    );
-  }
-
-  console.info(`Sonnet4: Initialized with ${models.length} model(s) in fallback chain`);
-
-  _sonnet4Instance = createFallback({
-    models,
-    modelResetInterval: 60000,
-    retryAfterOutput: true,
-    onError: (err, modelId) => {
-      // Handle various error formats
-      let errorMessage = 'Unknown error';
-      if (err instanceof Error) {
-        errorMessage = err.message;
-      } else if (err && typeof err === 'object') {
-        const errObj = err as Record<string, unknown>;
-        if ('message' in errObj) {
-          errorMessage = String(errObj.message);
-        }
-        if ('type' in errObj) {
-          errorMessage = `${errObj.type}: ${errObj.message || 'No message'}`;
-        }
-      } else {
-        errorMessage = String(err);
-      }
-
-      const errorDetails =
-        err instanceof Error && err.stack ? err.stack : JSON.stringify(err, null, 2);
-      console.error(`FALLBACK from model ${modelId}. Error: ${errorMessage}`);
-      console.error('Error details:', errorDetails);
-    },
-  });
-
-  return _sonnet4Instance;
-}
-
-// Export a proxy that initializes on first use
-export const Sonnet4 = new Proxy({} as ReturnType<typeof createFallback>, {
-  get(_target, prop) {
-    const instance = initializeSonnet4();
-    // Direct property access without receiver to avoid proxy conflicts
-    return instance[prop as keyof typeof instance];
-  },
-  has(_target, prop) {
-    const instance = initializeSonnet4();
-    return prop in instance;
-  },
-  ownKeys(_target) {
-    const instance = initializeSonnet4();
-    return Reflect.ownKeys(instance);
-  },
-  getOwnPropertyDescriptor(_target, prop) {
-    const instance = initializeSonnet4();
-    return Reflect.getOwnPropertyDescriptor(instance, prop);
-  },
-});
+// Export Sonnet 4 model using AI Gateway
+export const Sonnet4 = gatewayModel('anthropic/claude-4-sonnet-20250514');
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -877,6 +877,9 @@ importers:
      '@ai-sdk/anthropic':
        specifier: ^2.0.0
        version: 2.0.1(zod@3.25.76)
+      '@ai-sdk/gateway':
+        specifier: ^1.0.15
+        version: 1.0.15(zod@3.25.76)
      '@ai-sdk/google-vertex':
        specifier: ^3.0.0
        version: 3.0.3(zod@3.25.76)
@ -1248,6 +1251,12 @@ packages:
    peerDependencies:
      zod: ^3.25.76 || ^4

+  '@ai-sdk/gateway@1.0.15':
+    resolution: {integrity: sha512-xySXoQ29+KbGuGfmDnABx+O6vc7Gj7qugmj1kGpn0rW0rQNn6UKUuvscKMzWyv1Uv05GyC1vqHq8ZhEOLfXscQ==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4
+
  '@ai-sdk/gateway@1.0.3':
    resolution: {integrity: sha512-QRGz2vH1WR9NvCv8gWocoebAKiXcuqj22mug6i8COeVsp33x5K5cK2DT4TwiQx5SfYbqJbVoBT+UqnHF7A3PHA==}
    engines: {node: '>=18'}
@ -1290,6 +1299,12 @@ packages:
    peerDependencies:
      zod: ^3.25.76 || ^4

+  '@ai-sdk/provider-utils@3.0.7':
+    resolution: {integrity: sha512-o3BS5/t8KnBL3ubP8k3w77AByOypLm+pkIL/DCw0qKkhDbvhCy+L3hRTGPikpdb8WHcylAeKsjgwOxhj4cqTUA==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      zod: ^3.25.76 || ^4
+
  '@ai-sdk/provider@1.1.3':
    resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==}
    engines: {node: '>=18'}
@ -7620,6 +7635,10 @@ packages:
    resolution: {integrity: sha512-nVpZkTMM9rF6AQ9gPJpFsNAMt48wIzB5TQgiTLdHiuO8XEDhUgZEhqKlZWXbIzo9VmJ/HvysHqEaVeD5v9TPvA==}
    engines: {node: '>=20.0.0'}

+  eventsource-parser@3.0.6:
+    resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
+    engines: {node: '>=18.0.0'}
+
  eventsource@3.0.7:
    resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==}
    engines: {node: '>=18.0.0'}
@ -12325,6 +12344,12 @@ snapshots:
      '@ai-sdk/provider-utils': 3.0.1(zod@3.25.76)
      zod: 3.25.76

+  '@ai-sdk/gateway@1.0.15(zod@3.25.76)':
+    dependencies:
+      '@ai-sdk/provider': 2.0.0
+      '@ai-sdk/provider-utils': 3.0.7(zod@3.25.76)
+      zod: 3.25.76
+
  '@ai-sdk/gateway@1.0.3(zod@3.25.1)':
    dependencies:
      '@ai-sdk/provider': 2.0.0
@ -12391,6 +12416,13 @@ snapshots:
      zod: 3.25.76
      zod-to-json-schema: 3.24.6(zod@3.25.76)

+  '@ai-sdk/provider-utils@3.0.7(zod@3.25.76)':
+    dependencies:
+      '@ai-sdk/provider': 2.0.0
+      '@standard-schema/spec': 1.0.0
+      eventsource-parser: 3.0.6
+      zod: 3.25.76
+
  '@ai-sdk/provider@1.1.3':
    dependencies:
      json-schema: 0.4.0
@ -18323,14 +18355,14 @@ snapshots:
      msw: 2.10.4(@types/node@20.19.4)(typescript@5.9.2)
      vite: 7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)

-  '@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
+  '@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
    dependencies:
      '@vitest/spy': 3.2.4
      estree-walker: 3.0.3
      magic-string: 0.30.17
    optionalDependencies:
      msw: 2.10.4(@types/node@24.0.10)(typescript@5.9.2)
-      vite: 7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)
+      vite: 7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0)

  '@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.1.3(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))':
    dependencies:
@ -20420,6 +20452,8 @@ snapshots:

  eventsource-parser@3.0.3: {}

+  eventsource-parser@3.0.6: {}
+
  eventsource@3.0.7:
    dependencies:
      eventsource-parser: 3.0.3
@ -25525,7 +25559,7 @@ snapshots:
    dependencies:
      '@types/chai': 5.2.2
      '@vitest/expect': 3.2.4
-      '@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))
+      '@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.9.2))(vite@7.0.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.90.0)(terser@5.43.1)(tsx@4.20.4)(yaml@2.8.0))
      '@vitest/pretty-format': 3.2.4
      '@vitest/runner': 3.2.4
      '@vitest/snapshot': 3.2.4