Merge pull request #673 from buster-so/gpt-5-eval

Gpt 5
This commit is contained in:
dal 2025-08-07 12:51:42 -06:00 committed by GitHub
commit d27691b619
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 107 additions and 23 deletions

View File

@ -39,6 +39,7 @@
"dependencies": {
"@ai-sdk/anthropic": "^1.2.12",
"@ai-sdk/google-vertex": "^2.2.27",
"@ai-sdk/openai": "^1.3.23",
"@ai-sdk/provider": "^1.1.3",
"@buster/access-controls": "workspace:*",
"@buster/data-source": "workspace:*",

View File

@ -7,23 +7,27 @@ import {
modifyDashboards,
modifyMetrics,
} from '../../tools';
import { GPT5 } from '../../utils';
import { Sonnet4 } from '../../utils/models/sonnet-4';
const DEFAULT_OPTIONS = {
maxSteps: 18,
temperature: 0,
maxTokens: 10000,
temperature: 1,
providerOptions: {
anthropic: {
disableParallelToolCalls: true,
},
openai: {
parallelToolCalls: false,
reasoningEffort: 'minimal',
},
},
};
export const analystAgent = new Agent({
name: 'Analyst Agent',
instructions: '', // We control the system messages in the step at stream instantiation
model: Sonnet4,
model: GPT5,
tools: {
createMetrics,
modifyMetrics,

View File

@ -6,12 +6,12 @@ import {
sequentialThinking,
submitThoughts,
} from '../../tools';
import { GPT5 } from '../../utils';
import { Sonnet4 } from '../../utils/models/sonnet-4';
const DEFAULT_OPTIONS = {
maxSteps: 18,
temperature: 0,
maxTokens: 10000,
temperature: 1,
providerOptions: {
anthropic: {
disableParallelToolCalls: true,
@ -22,7 +22,7 @@ const DEFAULT_OPTIONS = {
export const thinkAndPrepAgent = new Agent({
name: 'Think and Prep Agent',
instructions: '', // We control the system messages in the step at stream instantiation
model: Sonnet4,
model: GPT5,
tools: {
sequentialThinking,
executeSql,

View File

@ -14,8 +14,10 @@ export * from './models/ai-fallback';
export * from './models/providers/anthropic';
export * from './models/anthropic-cached';
export * from './models/providers/vertex';
export * from './models/providers/openai';
export * from './models/sonnet-4';
export * from './models/haiku-3-5';
export * from './models/gpt-5';
// Streaming utilities
export * from './streaming';

View File

@ -0,0 +1,62 @@
import type { LanguageModelV1 } from '@ai-sdk/provider';
import { createFallback } from './ai-fallback';
import { openaiModel } from './providers/openai';
// Lazy initialization to allow mocking in tests
let _gpt5Instance: ReturnType<typeof createFallback> | null = null;
function initializeGPT5() {
if (_gpt5Instance) {
return _gpt5Instance;
}
// Build models array based on available credentials
const models: LanguageModelV1[] = [];
// Only include OpenAI if API key is available
if (process.env.OPENAI_API_KEY) {
try {
models.push(openaiModel('gpt-5-2025-08-07'));
console.info('GPT5: OpenAI model added to fallback chain');
} catch (error) {
console.warn('GPT5: Failed to initialize OpenAI model:', error);
}
}
// Ensure we have at least one model
if (models.length === 0) {
throw new Error('No AI models available. Please set OPENAI_API_KEY environment variable.');
}
console.info(`GPT5: Initialized with ${models.length} model(s) in fallback chain`);
_gpt5Instance = createFallback({
models,
modelResetInterval: 60000,
retryAfterOutput: true,
onError: (err) => console.error(`FALLBACK. Here is the error: ${err}`),
});
return _gpt5Instance;
}
// Export a proxy that initializes on first use
export const GPT5 = new Proxy({} as ReturnType<typeof createFallback>, {
get(_target, prop) {
const instance = initializeGPT5();
// Direct property access without receiver to avoid proxy conflicts
return instance[prop as keyof typeof instance];
},
has(_target, prop) {
const instance = initializeGPT5();
return prop in instance;
},
ownKeys(_target) {
const instance = initializeGPT5();
return Reflect.ownKeys(instance);
},
getOwnPropertyDescriptor(_target, prop) {
const instance = initializeGPT5();
return Reflect.getOwnPropertyDescriptor(instance, prop);
},
});

View File

@ -0,0 +1,11 @@
import { createOpenAI } from '@ai-sdk/openai';
import { wrapAISDKModel } from 'braintrust';
export const openaiModel = (modelId: string) => {
const openai = createOpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
// Wrap the model with Braintrust tracing and return it
return wrapAISDKModel(openai(modelId));
};

View File

@ -308,10 +308,10 @@ importers:
version: 49.2.4(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
'@platejs/autoformat':
specifier: 'catalog:'
version: 49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@platejs/basic-nodes':
specifier: 'catalog:'
version: 49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@platejs/basic-styles':
specifier: ^49.0.0
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
@ -362,7 +362,7 @@ importers:
version: 49.2.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@platejs/markdown':
specifier: 'catalog:'
version: 49.2.1(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
version: 49.2.1(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
'@platejs/math':
specifier: ^49.0.0
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
@ -847,6 +847,9 @@ importers:
'@ai-sdk/google-vertex':
specifier: ^2.2.27
version: 2.2.27(zod@3.25.1)
'@ai-sdk/openai':
specifier: ^1.3.23
version: 1.3.23(zod@3.25.1)
'@ai-sdk/provider':
specifier: ^1.1.3
version: 1.1.3
@ -1073,13 +1076,13 @@ importers:
version: link:../vitest-config
'@platejs/autoformat':
specifier: 'catalog:'
version: 49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@platejs/basic-nodes':
specifier: 'catalog:'
version: 49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
version: 49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@platejs/markdown':
specifier: 'catalog:'
version: 49.2.1(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
version: 49.2.1(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
platejs:
specifier: 'catalog:'
version: 49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1))
@ -6804,6 +6807,7 @@ packages:
bun@1.2.18:
resolution: {integrity: sha512-OR+EpNckoJN4tHMVZPaTPxDj2RgpJgJwLruTIFYbO3bQMguLd0YrmkWKYqsiihcLgm2ehIjF/H1RLfZiRa7+qQ==}
cpu: [arm64, x64, aarch64]
os: [darwin, linux, win32]
hasBin: true
@ -16510,7 +16514,7 @@ snapshots:
'@platejs/ai@49.2.4(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)':
dependencies:
'@platejs/markdown': 49.2.1(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
'@platejs/markdown': 49.2.1(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)
'@platejs/selection': 49.2.4(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
lodash: 4.17.21
platejs: 49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1))
@ -16520,14 +16524,14 @@ snapshots:
- supports-color
- typescript
'@platejs/autoformat@49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)':
'@platejs/autoformat@49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)':
dependencies:
lodash: 4.17.21
platejs: 49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1))
react: 18.3.1
react-dom: 18.3.1(react@18.3.1)
'@platejs/basic-nodes@49.0.0(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)':
'@platejs/basic-nodes@49.0.0(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)':
dependencies:
platejs: 49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1))
react: 18.3.1
@ -16583,7 +16587,7 @@ snapshots:
html-entities: 2.6.0
is-hotkey: 0.2.0
jotai: 2.8.4(@types/react@18.3.23)(react@18.3.1)
jotai-optics: 0.4.0(jotai@2.8.4(react@18.3.1))(optics-ts@2.4.1)
jotai-optics: 0.4.0(jotai@2.8.4(@types/react@18.3.23)(react@18.3.1))(optics-ts@2.4.1)
jotai-x: 2.3.3(@types/react@18.3.23)(jotai@2.8.4(@types/react@18.3.23)(react@18.3.1))(react@18.3.1)
lodash: 4.17.21
nanoid: 5.1.5
@ -16594,7 +16598,7 @@ snapshots:
slate-react: 0.117.1(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)
use-deep-compare: 1.3.0(react@18.3.1)
zustand: 5.0.7(@types/react@18.3.23)(immer@10.1.1)(react@18.3.1)(use-sync-external-store@1.5.0(react@18.3.1))
zustand-x: 6.1.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(zustand@5.0.7(immer@10.1.1)(react@18.3.1)(use-sync-external-store@1.5.0(react@18.3.1)))
zustand-x: 6.1.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(zustand@5.0.7(@types/react@18.3.23)(immer@10.1.1)(react@18.3.1)(use-sync-external-store@1.5.0(react@18.3.1)))
transitivePeerDependencies:
- '@types/react'
- immer
@ -16685,7 +16689,7 @@ snapshots:
react: 18.3.1
react-dom: 18.3.1(react@18.3.1)
'@platejs/markdown@49.2.1(platejs@49.2.4(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)':
'@platejs/markdown@49.2.1(platejs@49.2.4(@types/react@18.3.23)(immer@10.1.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(slate-dom@0.116.0(slate@0.117.0))(slate@0.117.0)(use-sync-external-store@1.5.0(react@18.3.1)))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.8.3)':
dependencies:
marked: 15.0.12
mdast-util-math: 3.0.0
@ -19242,14 +19246,14 @@ snapshots:
msw: 2.10.4(@types/node@20.19.4)(typescript@5.8.3)
vite: 6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0)
'@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))':
'@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))':
dependencies:
'@vitest/spy': 3.2.4
estree-walker: 3.0.3
magic-string: 0.30.17
optionalDependencies:
msw: 2.10.4(@types/node@24.0.10)(typescript@5.8.3)
vite: 6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0)
vite: 6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0)
'@vitest/pretty-format@2.0.5':
dependencies:
@ -22554,7 +22558,7 @@ snapshots:
jose@5.10.0: {}
jotai-optics@0.4.0(jotai@2.8.4(react@18.3.1))(optics-ts@2.4.1):
jotai-optics@0.4.0(jotai@2.8.4(@types/react@18.3.23)(react@18.3.1))(optics-ts@2.4.1):
dependencies:
jotai: 2.8.4(@types/react@18.3.23)(react@18.3.1)
optics-ts: 2.4.1
@ -26578,7 +26582,7 @@ snapshots:
dependencies:
'@types/chai': 5.2.2
'@vitest/expect': 3.2.4
'@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))
'@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))
'@vitest/pretty-format': 3.2.4
'@vitest/runner': 3.2.4
'@vitest/snapshot': 3.2.4
@ -26985,7 +26989,7 @@ snapshots:
zod@3.25.1: {}
zustand-x@6.1.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(zustand@5.0.7(immer@10.1.1)(react@18.3.1)(use-sync-external-store@1.5.0(react@18.3.1))):
zustand-x@6.1.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(scheduler@0.23.2)(zustand@5.0.7(@types/react@18.3.23)(immer@10.1.1)(react@18.3.1)(use-sync-external-store@1.5.0(react@18.3.1))):
dependencies:
immer: 10.1.1
lodash.mapvalues: 4.6.0