buster/packages/ai/src/agents/analytics-engineer-agent/analytics-engineer-agent.ts

154 lines
6.0 KiB
TypeScript
Raw Normal View History

2025-10-01 10:49:25 +08:00
import type { LanguageModelV2 } from '@ai-sdk/provider';
import type { Sandbox } from '@buster/sandbox';
2025-08-06 12:11:48 +08:00
import { type ModelMessage, hasToolCall, stepCountIs, streamText } from 'ai';
import { wrapTraced } from 'braintrust';
import z from 'zod';
2025-09-03 11:21:33 +08:00
import { DEFAULT_ANTHROPIC_OPTIONS } from '../../llm/providers/gateway';
2025-08-08 22:03:22 +08:00
import { Sonnet4 } from '../../llm/sonnet-4';
2025-10-01 12:15:38 +08:00
import { createIdleTool } from '../../tools';
2025-10-01 12:44:15 +08:00
import { createEditFileTool, createLsTool, createMultiEditFileTool, createWriteFileTool } from '../../tools/file-tools';
2025-10-01 12:15:38 +08:00
import { createBashTool } from '../../tools/file-tools/bash-tool/bash-tool';
import { createGrepTool } from '../../tools/file-tools/grep-tool/grep-tool';
import { createReadFileTool } from '../../tools/file-tools/read-file-tool/read-file-tool';
import { type AgentContext, repairToolCall } from '../../utils/tool-call-repair';
import { getDocsAgentSystemPrompt as getAnalyticsEngineerAgentSystemPrompt } from './get-analytics-engineer-agent-system-prompt';
import type { ToolEventCallback } from './tool-events';
export const ANALYST_ENGINEER_AGENT_NAME = 'analyticsEngineerAgent';
const STOP_CONDITIONS = [stepCountIs(100), hasToolCall('idleTool')];
2025-08-06 12:11:48 +08:00
const AnalyticsEngineerAgentOptionsSchema = z.object({
2025-08-06 12:11:48 +08:00
folder_structure: z.string().describe('The file structure of the dbt repository'),
userId: z.string(),
chatId: z.string(),
dataSourceId: z.string(),
organizationId: z.string(),
2025-08-16 05:24:05 +08:00
messageId: z.string(),
2025-08-08 06:09:45 +08:00
sandbox: z
.custom<Sandbox>(
(val) => {
return val && typeof val === 'object' && 'id' in val && 'fs' in val;
},
{ message: 'Invalid Sandbox instance' }
)
.optional(),
model: z
.custom<LanguageModelV2>()
.optional()
.describe('Custom language model to use (defaults to Sonnet4)'),
2025-08-06 12:11:48 +08:00
});
const AnalyticsEngineerAgentStreamOptionsSchema = z.object({
2025-08-06 12:11:48 +08:00
messages: z.array(z.custom<ModelMessage>()).describe('The messages to send to the docs agent'),
});
2025-08-06 12:11:48 +08:00
export type AnalyticsEngineerAgentOptions = z.infer<typeof AnalyticsEngineerAgentOptionsSchema> & {
onToolEvent?: ToolEventCallback;
};
export type AnalyticsEngineerAgentStreamOptions = z.infer<typeof AnalyticsEngineerAgentStreamOptionsSchema>;
2025-08-06 12:11:48 +08:00
// Extended type for passing to tools (includes sandbox)
export type DocsAgentContextWithSandbox = AnalyticsEngineerAgentOptions & { sandbox: Sandbox };
export function createAnalyticsEngineerAgent(analyticsEngineerAgentOptions: AnalyticsEngineerAgentOptions) {
2025-08-06 12:11:48 +08:00
const systemMessage = {
role: 'system',
content: getAnalyticsEngineerAgentSystemPrompt(analyticsEngineerAgentOptions.folder_structure),
2025-09-03 11:21:33 +08:00
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
2025-08-06 12:11:48 +08:00
} as ModelMessage;
const idleTool = createIdleTool({
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
});
const writeFileTool = createWriteFileTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
2025-10-03 22:34:18 +08:00
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
});
2025-10-01 12:15:38 +08:00
const grepTool = createGrepTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
2025-10-01 12:15:38 +08:00
});
const readFileTool = createReadFileTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
2025-10-01 12:15:38 +08:00
});
const bashTool = createBashTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
2025-10-01 12:15:38 +08:00
});
2025-10-01 12:37:14 +08:00
const editFileTool = createEditFileTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
2025-10-03 23:07:20 +08:00
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
2025-10-01 12:37:14 +08:00
});
const multiEditFileTool = createMultiEditFileTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
2025-10-03 23:07:20 +08:00
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
2025-10-01 12:37:14 +08:00
});
2025-10-01 12:44:15 +08:00
const lsTool = createLsTool({
messageId: analyticsEngineerAgentOptions.messageId,
projectDirectory: analyticsEngineerAgentOptions.folder_structure,
onToolEvent: analyticsEngineerAgentOptions.onToolEvent,
2025-10-01 12:44:15 +08:00
});
// Create planning tools with simple context
async function stream({ messages }: AnalyticsEngineerAgentStreamOptions) {
// Collect available tools dynamically based on what's enabled
const availableTools: string[] = ['sequentialThinking'];
availableTools.push('executeSql');
availableTools.push('updateClarificationsFile', 'checkOffTodoList', 'idleTool', 'webSearch');
const agentContext: AgentContext = {
agentName: ANALYST_ENGINEER_AGENT_NAME,
availableTools,
};
2025-08-06 12:11:48 +08:00
return wrapTraced(
() =>
streamText({
model: analyticsEngineerAgentOptions.model || Sonnet4,
2025-09-03 11:21:33 +08:00
providerOptions: DEFAULT_ANTHROPIC_OPTIONS,
2025-08-06 12:11:48 +08:00
tools: {
idleTool,
2025-10-01 12:15:38 +08:00
grepTool,
writeFileTool,
2025-10-01 12:15:38 +08:00
readFileTool,
bashTool,
2025-10-01 12:37:14 +08:00
editFileTool,
multiEditFileTool,
2025-10-01 12:44:15 +08:00
lsTool,
2025-08-06 12:11:48 +08:00
},
messages: [systemMessage, ...messages],
stopWhen: STOP_CONDITIONS,
toolChoice: 'required',
maxOutputTokens: 10000,
temperature: 0,
experimental_context: analyticsEngineerAgentOptions,
experimental_repairToolCall: async (repairContext) => {
return repairToolCall({
toolCall: repairContext.toolCall,
tools: repairContext.tools,
error: repairContext.error,
messages: repairContext.messages,
...(repairContext.system && { system: repairContext.system }),
...(repairContext.inputSchema && { inputSchema: repairContext.inputSchema }),
agentContext,
});
},
2025-08-06 12:11:48 +08:00
}),
{
name: 'Docs Agent',
}
)();
}
return {
stream,
};
}