diff --git a/apps/cli/package.json b/apps/cli/package.json index d867dcf7e..30b202dcc 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -22,6 +22,7 @@ "clean": "rm -rf dist" }, "dependencies": { + "@buster/ai": "workspace:*", "@buster/sdk": "workspace:*", "@buster/server-shared": "workspace:*", "chalk": "^5.6.0", diff --git a/apps/cli/src/commands/main.tsx b/apps/cli/src/commands/main.tsx index 7a0b49839..4d222d3e5 100644 --- a/apps/cli/src/commands/main.tsx +++ b/apps/cli/src/commands/main.tsx @@ -228,7 +228,7 @@ export function Main() { return responses; }; - const handleSubmit = useCallback(() => { + const handleSubmit = useCallback(async () => { const trimmed = input.trim(); if (!trimmed) { setInput(''); @@ -242,10 +242,28 @@ export function Main() { content: trimmed, }; - const mockResponses = getMockResponse(trimmed); - - setMessages((prev) => [...prev, userMessage, ...mockResponses]); + setMessages((prev) => [...prev, userMessage]); setInput(''); + + // Import and run the docs agent + const { runDocsAgent } = await import('../services/docs-agent-handler'); + + await runDocsAgent({ + userMessage: trimmed, + onMessage: (agentMessage) => { + messageCounter.current += 1; + setMessages((prev) => [ + ...prev, + { + id: messageCounter.current, + type: agentMessage.type, + content: agentMessage.content, + messageType: agentMessage.messageType, + metadata: agentMessage.metadata, + }, + ]); + }, + }); }, [input]); const handleCommandExecute = useCallback( diff --git a/apps/cli/src/services/docs-agent-handler.ts b/apps/cli/src/services/docs-agent-handler.ts new file mode 100644 index 000000000..bbc1a5b44 --- /dev/null +++ b/apps/cli/src/services/docs-agent-handler.ts @@ -0,0 +1,118 @@ +import { createDocsAgent } from '@buster/ai/agents/docs-agent/docs-agent'; +import { createProxyModel } from '@buster/ai/llm/providers/proxy-model'; +import type { ModelMessage } from 'ai'; +import { randomUUID } from 'node:crypto'; +import { getProxyConfig } from '../utils/ai-proxy'; + +export interface DocsAgentMessage { + id: number; + type: 'user' | 'assistant'; + content: string; + messageType?: 'PLAN' | 'EDIT' | 'EXECUTE' | 'WRITE' | 'WEB_SEARCH'; + metadata?: string; +} + +export interface RunDocsAgentParams { + userMessage: string; + onMessage: (message: DocsAgentMessage) => void; +} + +/** + * Runs the docs agent in the CLI without sandbox + * The agent runs locally but uses the proxy model to route LLM calls through the server + */ +export async function runDocsAgent(params: RunDocsAgentParams): Promise { + const { userMessage, onMessage } = params; + + let messageId = 1; + + // Get proxy configuration + const proxyConfig = await getProxyConfig(); + + // Create proxy model that routes through server + const proxyModel = createProxyModel({ + baseURL: proxyConfig.baseURL, + modelId: 'anthropic/claude-4-sonnet-20250514', + }); + + // Create the docs agent with proxy model + // Tools are handled locally, only model calls go through proxy + const docsAgent = createDocsAgent({ + folder_structure: 'CLI mode - limited file access', + userId: 'cli-user', + chatId: randomUUID(), + dataSourceId: '', + organizationId: 'cli', + messageId: randomUUID(), + model: proxyModel, + }); + + const messages: ModelMessage[] = [ + { + role: 'user', + content: userMessage, + }, + ]; + + try { + // Execute the docs agent + const result = await docsAgent.stream({ messages }); + + // Stream the response + for await (const part of result.fullStream) { + // Handle different stream part types + if (part.type === 'text-delta') { + onMessage({ + id: messageId++, + type: 'assistant', + content: part.delta, + }); + } else if (part.type === 'tool-call') { + // Map tool calls to message types + let messageType: DocsAgentMessage['messageType']; + let content = ''; + let metadata = ''; + + switch (part.toolName) { + case 'sequentialThinking': + messageType = 'PLAN'; + content = 'Planning next steps...'; + break; + case 'bashExecute': + messageType = 'EXECUTE'; + content = 'Executing command...'; + break; + case 'webSearch': + messageType = 'WEB_SEARCH'; + content = 'Searching the web...'; + break; + case 'grepSearch': + messageType = 'EXECUTE'; + content = 'Searching files...'; + break; + case 'idleTool': + messageType = 'EXECUTE'; + content = 'Entering idle state...'; + break; + default: + content = `Using tool: ${part.toolName}`; + } + + onMessage({ + id: messageId++, + type: 'assistant', + content, + messageType, + metadata, + }); + } + // Ignore other stream part types (start, finish, etc.) + } + } catch (error) { + onMessage({ + id: messageId++, + type: 'assistant', + content: `Error: ${error instanceof Error ? error.message : 'Unknown error'}`, + }); + } +} diff --git a/apps/cli/src/utils/ai-proxy.ts b/apps/cli/src/utils/ai-proxy.ts new file mode 100644 index 000000000..5c3e20866 --- /dev/null +++ b/apps/cli/src/utils/ai-proxy.ts @@ -0,0 +1,35 @@ +import { z } from 'zod'; + +const ProxyConfigSchema = z.object({ + baseURL: z.string().url().describe('Base URL for the AI proxy endpoint'), +}); + +export type ProxyConfig = z.infer; + +/** + * Gets the AI proxy configuration for the CLI + * + * Priority order: + * 1. BUSTER_AI_PROXY_URL environment variable + * 2. Saved credentials apiUrl from ~/.buster/credentials.json + * 3. Default to localhost:3002 for local development + */ +export async function getProxyConfig(): Promise { + const { getCredentials } = await import('./credentials'); + const creds = await getCredentials(); + + // Check for AI proxy-specific URL (highest priority) + const proxyUrl = process.env.BUSTER_AI_PROXY_URL; + + if (proxyUrl) { + return ProxyConfigSchema.parse({ baseURL: proxyUrl }); + } + + // Fall back to regular API URL from credentials + if (creds?.apiUrl) { + return ProxyConfigSchema.parse({ baseURL: creds.apiUrl }); + } + + // Default to localhost for development + return ProxyConfigSchema.parse({ baseURL: 'http://localhost:3002' }); +} diff --git a/apps/server/src/api/v2/index.ts b/apps/server/src/api/v2/index.ts index 192879d62..9116418df 100644 --- a/apps/server/src/api/v2/index.ts +++ b/apps/server/src/api/v2/index.ts @@ -10,6 +10,7 @@ import dictionariesRoutes from './dictionaries'; import docsRoutes from './docs'; import electricShapeRoutes from './electric-shape'; import githubRoutes from './github'; +import { llm } from './llm'; import metricFilesRoutes from './metric_files'; import organizationRoutes from './organization'; import publicRoutes from './public'; @@ -33,6 +34,7 @@ const app = new Hono() .route('/electric-shape', electricShapeRoutes) .route('/healthcheck', healthcheckRoutes) .route('/chats', chatsRoutes) + .route('/llm', llm) .route('/metric_files', metricFilesRoutes) .route('/github', githubRoutes) .route('/slack', slackRoutes) diff --git a/apps/server/src/api/v2/llm/index.ts b/apps/server/src/api/v2/llm/index.ts new file mode 100644 index 000000000..fdf72d2c7 --- /dev/null +++ b/apps/server/src/api/v2/llm/index.ts @@ -0,0 +1,4 @@ +import { Hono } from 'hono'; +import { proxy } from './proxy'; + +export const llm = new Hono().route('/proxy', proxy); diff --git a/apps/server/src/api/v2/llm/proxy/POST.ts b/apps/server/src/api/v2/llm/proxy/POST.ts new file mode 100644 index 000000000..631cc6139 --- /dev/null +++ b/apps/server/src/api/v2/llm/proxy/POST.ts @@ -0,0 +1,42 @@ +import { gatewayModel } from '@buster/ai/llm/providers/gateway'; +import { zValidator } from '@hono/zod-validator'; +import { Hono } from 'hono'; +import { stream } from 'hono/streaming'; +import { z } from 'zod'; + +const ProxyRequestSchema = z.object({ + model: z.string().describe('Model ID to use'), + options: z.any().describe('LanguageModelV2CallOptions from AI SDK'), +}); + +export const POST = new Hono().post('/', zValidator('json', ProxyRequestSchema), async (c) => { + try { + const { model, options } = c.req.valid('json'); + + console.info('[PROXY] Request received', { model }); + + // Get the gateway model + const modelInstance = gatewayModel(model); + + // Call the model's doStream method directly (this is a model-level operation) + const result = await modelInstance.doStream(options); + + // Stream the LanguageModelV2StreamPart objects + return stream(c, async (stream) => { + try { + const reader = result.stream.getReader(); + while (true) { + const { done, value } = await reader.read(); + if (done) break; + await stream.write(`${JSON.stringify(value)}\n`); + } + } catch (streamError) { + console.error('[PROXY] Stream error:', streamError); + throw streamError; + } + }); + } catch (error) { + console.error('[PROXY] Endpoint error:', error); + return c.json({ error: error instanceof Error ? error.message : 'Unknown error' }, 500); + } +}); diff --git a/apps/server/src/api/v2/llm/proxy/index.ts b/apps/server/src/api/v2/llm/proxy/index.ts new file mode 100644 index 000000000..d981eb11a --- /dev/null +++ b/apps/server/src/api/v2/llm/proxy/index.ts @@ -0,0 +1,4 @@ +import { Hono } from 'hono'; +import { POST } from './POST'; + +export const proxy = new Hono().route('/', POST); diff --git a/packages/ai/src/agents/docs-agent/docs-agent.ts b/packages/ai/src/agents/docs-agent/docs-agent.ts index 5a2bbee70..e7b8c597d 100644 --- a/packages/ai/src/agents/docs-agent/docs-agent.ts +++ b/packages/ai/src/agents/docs-agent/docs-agent.ts @@ -1,23 +1,13 @@ import type { Sandbox } from '@buster/sandbox'; +import type { LanguageModelV2 } from '@ai-sdk/provider'; import { type ModelMessage, hasToolCall, stepCountIs, streamText } from 'ai'; import { wrapTraced } from 'braintrust'; import z from 'zod'; import { DEFAULT_ANTHROPIC_OPTIONS } from '../../llm/providers/gateway'; import { Sonnet4 } from '../../llm/sonnet-4'; import { - createBashTool, - createCheckOffTodoListTool, - createCreateFilesTool, - createDeleteFilesTool, - createEditFilesTool, - createGrepSearchTool, + bashExecute, createIdleTool, - createListFilesTool, - createReadFilesTool, - createSequentialThinkingTool, - createUpdateClarificationsFileTool, - createWebSearchTool, - executeSqlDocsAgent, } from '../../tools'; import { type AgentContext, repairToolCall } from '../../utils/tool-call-repair'; import { getDocsAgentSystemPrompt } from './get-docs-agent-system-prompt'; @@ -41,6 +31,7 @@ const DocsAgentOptionsSchema = z.object({ { message: 'Invalid Sandbox instance' } ) .optional(), + model: z.custom().optional().describe('Custom language model to use (defaults to Sonnet4)'), }); const DocsStreamOptionsSchema = z.object({ @@ -62,73 +53,10 @@ export function createDocsAgent(docsAgentOptions: DocsAgentOptions) { const idleTool = createIdleTool(); - // Create tool context with messageId and sandbox if available - // Create file tools with context (only if sandbox is available) - const listFiles = docsAgentOptions.sandbox - ? createListFilesTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const readFiles = docsAgentOptions.sandbox - ? createReadFilesTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const createFiles = docsAgentOptions.sandbox - ? createCreateFilesTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const editFiles = docsAgentOptions.sandbox - ? createEditFilesTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const deleteFiles = docsAgentOptions.sandbox - ? createDeleteFilesTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const bashExecute = docsAgentOptions.sandbox - ? createBashTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - const grepSearch = docsAgentOptions.sandbox - ? createGrepSearchTool({ - messageId: docsAgentOptions.messageId || 'default', - sandbox: docsAgentOptions.sandbox, - }) - : undefined; - - const webSearch = createWebSearchTool(); - // Create planning tools with simple context - const checkOffTodoList = createCheckOffTodoListTool({ - todoList: '', - updateTodoList: () => {}, - }); - - const updateClarificationsFile = createUpdateClarificationsFileTool({ - clarifications: [], - updateClarifications: () => {}, - }); - async function stream({ messages }: DocsStreamOptions) { // Collect available tools dynamically based on what's enabled const availableTools: string[] = ['sequentialThinking']; - if (grepSearch) availableTools.push('grepSearch'); - if (readFiles) availableTools.push('readFiles'); - if (editFiles) availableTools.push('editFiles'); - if (createFiles) availableTools.push('createFiles'); - if (deleteFiles) availableTools.push('deleteFiles'); - if (listFiles) availableTools.push('listFiles'); availableTools.push('executeSql'); if (bashExecute) availableTools.push('bashExecute'); availableTools.push('updateClarificationsFile', 'checkOffTodoList', 'idleTool', 'webSearch'); @@ -141,24 +69,10 @@ export function createDocsAgent(docsAgentOptions: DocsAgentOptions) { return wrapTraced( () => streamText({ - model: Sonnet4, + model: docsAgentOptions.model || Sonnet4, providerOptions: DEFAULT_ANTHROPIC_OPTIONS, tools: { - sequentialThinking: createSequentialThinkingTool({ - messageId: docsAgentOptions.messageId, - }), - ...(grepSearch && { grepSearch }), - ...(readFiles && { readFiles }), - ...(editFiles && { editFiles }), - ...(createFiles && { createFiles }), - ...(deleteFiles && { deleteFiles }), - ...(listFiles && { listFiles }), - executeSql: executeSqlDocsAgent, - ...(bashExecute && { bashExecute }), - updateClarificationsFile, - checkOffTodoList, idleTool, - webSearch, }, messages: [systemMessage, ...messages], stopWhen: STOP_CONDITIONS, diff --git a/packages/ai/src/llm/index.ts b/packages/ai/src/llm/index.ts index 05b080fdc..b9a99d4f7 100644 --- a/packages/ai/src/llm/index.ts +++ b/packages/ai/src/llm/index.ts @@ -3,3 +3,4 @@ export * from './sonnet-4'; export * from './gpt-5-nano'; export * from './gpt-5'; export * from './gpt-5-mini'; +export * from './providers/proxy-model'; diff --git a/packages/ai/src/llm/providers/proxy-model.ts b/packages/ai/src/llm/providers/proxy-model.ts new file mode 100644 index 000000000..987b657a2 --- /dev/null +++ b/packages/ai/src/llm/providers/proxy-model.ts @@ -0,0 +1,193 @@ +import type { + LanguageModelV2, + LanguageModelV2CallOptions, + LanguageModelV2StreamPart, +} from '@ai-sdk/provider'; +import { z } from 'zod'; + +const ProxyModelConfigSchema = z.object({ + baseURL: z.string().describe('Base URL of the proxy server'), + modelId: z.string().describe('Model ID to proxy requests to'), +}); + +type ProxyModelConfig = z.infer; + +/** + * Creates a LanguageModelV2-compatible proxy that routes requests through a server endpoint. + * + * The server endpoint is expected to: + * 1. Accept POST requests with { model: string, options: LanguageModelV2CallOptions } + * 2. Return a stream of LanguageModelV2StreamPart objects as newline-delimited JSON + * + * This allows CLI agents to proxy through the server's gateway logic while maintaining + * full compatibility with the AI SDK's streaming and tool calling features. + */ +export function createProxyModel(config: ProxyModelConfig): LanguageModelV2 { + const validated = ProxyModelConfigSchema.parse(config); + + return { + specificationVersion: 'v2', + modelId: validated.modelId, + provider: 'proxy', + supportedUrls: {}, + + async doGenerate(options: LanguageModelV2CallOptions) { + const response = await fetch(`${validated.baseURL}/api/v2/llm/proxy`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: validated.modelId, + options, + }), + }); + + if (!response.ok) { + throw new Error(`Proxy request failed: ${response.status} ${response.statusText}`); + } + + // Collect all stream parts + const parts: LanguageModelV2StreamPart[] = []; + if (!response.body) { + throw new Error('Response body is null'); + } + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + for (const line of lines) { + if (line.trim()) { + parts.push(JSON.parse(line)); + } + } + } + + // Extract final result from stream parts + const textDeltas = parts.filter((p) => p.type === 'text-delta'); + const toolCallParts = parts.filter( + (p) => + p.type === 'tool-input-start' || + p.type === 'tool-input-delta' || + p.type === 'tool-input-end' + ); + const finishPart = parts.find((p) => p.type === 'finish'); + + if (!finishPart || finishPart.type !== 'finish') { + throw new Error('Stream did not include finish part'); + } + + const content = []; + + // Add text content if present + if (textDeltas.length > 0) { + content.push({ + type: 'text' as const, + text: textDeltas.map((p) => (p.type === 'text-delta' ? p.delta : '')).join(''), + }); + } + + // Add tool calls if present + if (toolCallParts.length > 0) { + // Group tool call inputs by ID + const toolCallsById = new Map< + string, + { toolCallId: string; toolName: string; input: string } + >(); + + for (const part of toolCallParts) { + if (part.type === 'tool-input-start') { + toolCallsById.set(part.id, { + toolCallId: part.id, + toolName: part.toolName, + input: '', + }); + } else if (part.type === 'tool-input-delta') { + const call = toolCallsById.get(part.id); + if (call) { + call.input += part.delta; + } + } + } + + // Add tool calls to content + for (const call of toolCallsById.values()) { + content.push({ + type: 'tool-call' as const, + toolCallId: call.toolCallId, + toolName: call.toolName, + input: call.input, + }); + } + } + + return { + content, + finishReason: finishPart.finishReason, + usage: finishPart.usage, + warnings: [], + rawCall: { rawPrompt: '', rawSettings: {} }, + }; + }, + + async doStream(options: LanguageModelV2CallOptions) { + const response = await fetch(`${validated.baseURL}/api/v2/llm/proxy`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: validated.modelId, + options, + }), + }); + + if (!response.ok) { + throw new Error(`Proxy request failed: ${response.status} ${response.statusText}`); + } + + const stream = new ReadableStream({ + async start(controller) { + if (!response.body) { + controller.error(new Error('Response body is null')); + return; + } + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + for (const line of lines) { + if (line.trim()) { + const part = JSON.parse(line) as LanguageModelV2StreamPart; + controller.enqueue(part); + } + } + } + + controller.close(); + } catch (error) { + controller.error(error); + } + }, + }); + + return { + stream, + rawCall: { rawPrompt: '', rawSettings: {} }, + }; + }, + }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6dbd03272..b02a49ed9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -120,6 +120,9 @@ importers: apps/cli: dependencies: + '@buster/ai': + specifier: workspace:* + version: link:../../packages/ai '@buster/sdk': specifier: workspace:* version: link:../../packages/sdk @@ -19602,7 +19605,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@edge-runtime/vm@3.2.0)(@types/debug@4.1.12)(@types/node@24.3.1)(@vitest/browser@3.2.4)(@vitest/ui@3.2.4)(jiti@2.5.1)(jsdom@27.0.0(postcss@8.5.6))(lightningcss@1.30.1)(msw@2.11.3(@types/node@24.3.1)(typescript@5.9.2))(sass@1.93.1)(terser@5.43.1)(tsx@4.20.5)(yaml@2.8.1) + vitest: 3.2.4(@edge-runtime/vm@3.2.0)(@types/debug@4.1.12)(@types/node@22.18.1)(@vitest/browser@3.2.4)(@vitest/ui@3.2.4)(jiti@2.5.1)(jsdom@27.0.0(postcss@8.5.6))(lightningcss@1.30.1)(msw@2.11.3(@types/node@22.18.1)(typescript@5.9.2))(sass@1.93.1)(terser@5.43.1)(tsx@4.20.5)(yaml@2.8.1) '@vitest/utils@3.2.4': dependencies: