From 2f4aeba817a8f2b10ad8a7f7b9f477f837b19bb8 Mon Sep 17 00:00:00 2001
From: dal <dallin@buster.so>
Date: Mon, 28 Jul 2025 11:56:59 -0600
Subject: [PATCH] feat: enhance documentation and testing capabilities for docs
 agent workflow

- Added section in CLAUDE.md for direct database access during integration testing.
- Updated `maxSteps` in `docs-agent` to allow for more complex tasks.
- Improved validation in `docs-agent-context` for sandbox instances.
- Enhanced `create-docs-todos` step to handle todos more effectively.
- Introduced comprehensive integration tests for the docs agent workflow, covering various scenarios and edge cases.
- Added test helpers for creating mock dbt projects and managing sandboxes.
- Implemented error handling and logging improvements in the workflow execution process.
---
 CLAUDE.md                                     |   11 +
 .../ai/src/agents/docs-agent/docs-agent.ts    |    4 +-
 packages/ai/src/context/docs-agent-context.ts |    7 +-
 .../docs-agent/create-docs-todos-step.ts      |   46 +-
 .../src/steps/docs-agent/docs-agent-step.ts   |   28 +
 .../file-tools/bash-tool/bash-execute-tool.ts |    8 +-
 .../docs-agent-workflow.int.test.ts           |  433 ++++++
 .../docs-agent/docs-agent-workflow.test.ts    |   91 ++
 .../docs-agent/test-helpers/README.md         |  175 +++
 .../test-helpers/context-helpers.ts           |  176 +++
 .../docs-agent/test-helpers/index.ts          |    5 +
 .../test-helpers/mock-dbt-project.ts          | 1239 +++++++++++++++++
 .../docs-agent/test-helpers/mock-sandbox.ts   |  378 +++++
 .../docs-agent/test-helpers/run-example.ts    |  143 ++
 .../test-helpers/sandbox-helpers.ts           |  343 +++++
 .../docs-agent/test-helpers/simple-test.ts    |   90 ++
 packages/sandbox/src/filesystem/add-files.ts  |    4 +-
 17 files changed, 3145 insertions(+), 36 deletions(-)
 create mode 100644 packages/ai/src/workflows/docs-agent/docs-agent-workflow.int.test.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/README.md
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/context-helpers.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/index.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/mock-dbt-project.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/mock-sandbox.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/sandbox-helpers.ts
 create mode 100644 packages/ai/src/workflows/docs-agent/test-helpers/simple-test.ts
diff --git a/CLAUDE.md b/CLAUDE.md
index aea4f17f6..67e78c226 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -292,6 +292,17 @@ export async function getWorkspaceSettingsHandler(
   - `turbo run test` for running all tests
   - Add `--filter=<package-name>` to run tests for specific packages
 
+### Database Access for Integration Testing
+- **Direct database queries** - You can run queries against the local database using `psql` with the following connection:
+  ```bash
+  DATABASE_URL="postgresql://postgres:postgres@127.0.0.1:54322/postgres"
+  ```
+- **Usage example**:
+  ```bash
+  psql "postgresql://postgres:postgres@127.0.0.1:54322/postgres" -c "SELECT * FROM users LIMIT 5;"
+  ```
+- **Purpose** - This is primarily for writing and iterating on integration tests to verify database state and test query behavior
+
 ## Pre-Completion Workflow
 - Always run `turbo run test:unit`, `turbo run lint`, and `turbo run build:dry-run` before making any pull request or finishing a feature, bugfix, etc. to ensure things make it through CI/CD
 - You can run all these checks simultaneously with `turbo run build:dry-run lint test:unit`
\ No newline at end of file
diff --git a/packages/ai/src/agents/docs-agent/docs-agent.ts b/packages/ai/src/agents/docs-agent/docs-agent.ts
index 4f083f4f6..1a1a58c23 100644
--- a/packages/ai/src/agents/docs-agent/docs-agent.ts
+++ b/packages/ai/src/agents/docs-agent/docs-agent.ts
@@ -9,6 +9,7 @@ import {
   executeSqlDocsAgent,
   grepSearch,
   idleTool,
+  lsFiles,
   readFiles,
   sequentialThinking,
   updateClarificationsFile,
@@ -17,7 +18,7 @@ import {
 import { Sonnet4 } from '../../utils/models/sonnet-4';
 
 const DEFAULT_OPTIONS = {
-  maxSteps: 18,
+  maxSteps: 30,
   temperature: 0,
   maxTokens: 10000,
   providerOptions: {
@@ -38,6 +39,7 @@ export const docsAgent = new Agent({
     editFiles,
     createFiles,
     deleteFiles,
+    lsFiles,
     executeSql: executeSqlDocsAgent, // Use the docs-specific SQL tool that operates as a
     bashExecute,
     updateClarificationsFile,
diff --git a/packages/ai/src/context/docs-agent-context.ts b/packages/ai/src/context/docs-agent-context.ts
index 61e595052..908e11404 100644
--- a/packages/ai/src/context/docs-agent-context.ts
+++ b/packages/ai/src/context/docs-agent-context.ts
@@ -24,12 +24,7 @@ export type MessageUserClarifyingQuestion = z.infer<typeof ClarifyingQuestionSch
 export const DocsAgentContextSchema = z.object({
   [DocsAgentContextKeys.Sandbox]: z.custom<Sandbox>(
     (val) => {
-      return (
-        val &&
-        typeof val === 'object' &&
-        typeof val.execute === 'function' &&
-        typeof val.cleanup === 'function'
-      );
+      return val && typeof val === 'object' && 'id' in val && 'fs' in val;
     },
     {
       message: 'Invalid Sandbox instance',
diff --git a/packages/ai/src/steps/docs-agent/create-docs-todos-step.ts b/packages/ai/src/steps/docs-agent/create-docs-todos-step.ts
index 13720c095..a77609227 100644
--- a/packages/ai/src/steps/docs-agent/create-docs-todos-step.ts
+++ b/packages/ai/src/steps/docs-agent/create-docs-todos-step.ts
@@ -26,7 +26,7 @@ const createDocsTodosStepOutputSchema = z.object({
 const DEFAULT_OPTIONS = {
   maxSteps: 1,
   temperature: 0,
-  maxTokens: 300,
+  maxTokens: 3000,
 };
 
 const CREATE_TODO_LIST_PROMPT = `### Overview
@@ -82,6 +82,21 @@ You have access to various tools to complete tasks. Adhere to these rules:
    - Include confirmation items at phase ends.
 ---
 
+### How to Use the createTodoList Tool
+**IMPORTANT**: When you are ready to create the TODO list, you must call the createTodoList tool with a single parameter called "todos" that contains your entire markdown-formatted TODO list as a string. The markdown should follow the exact format shown in the examples below.
+
+For simple requests like "Document all models", create a basic TODO list:
+\`\`\`
+# DBT Documentation Todo
+
+## Phase 1: Document Models
+- [ ] Review all models in the project
+- [ ] Add descriptions to each model
+- [ ] Document all columns
+- [ ] Create relationships where applicable
+- [ ] Push changes and create pull request
+\`\`\`
+
 ### Examples
 #### User Request: "can you update the docs to clarify that deal amount fields in ‘customers’ table actually originate from HubSpot and the closed won amount field should be used when calculating the primary deal value"
 \`\`\`
@@ -238,31 +253,16 @@ const createDocsTodosExecution = async ({
     // Extract todos from the result
     let todosString = '';
 
-    // The createTodoList tool creates a file with the todos
-    // We need to look through the tool results to find the file content
+    // Look for the todos in the tool call arguments
     if (result.toolCalls && Array.isArray(result.toolCalls)) {
       for (const toolCall of result.toolCalls) {
-        if (toolCall.toolName === 'createTodoList') {
-          // The tool creates a file, so we need to look at the result
-          const toolResults = result.toolResults || [];
-          for (const toolResult of toolResults) {
-            if (toolResult.toolCallId === toolCall.toolCallId) {
-              // Extract the file content from the result
-              if (toolResult.result && typeof toolResult.result === 'object') {
-                const resultObj = toolResult.result as Record<string, unknown>;
-                const fileObj = resultObj.file as Record<string, unknown> | undefined;
-                if (fileObj?.text && typeof fileObj.text === 'string') {
-                  todosString = fileObj.text;
-                } else if (resultObj.text && typeof resultObj.text === 'string') {
-                  todosString = resultObj.text;
-                } else if (typeof resultObj === 'string') {
-                  todosString = resultObj;
-                }
-              }
-              break;
-            }
+        if (toolCall.toolName === 'createTodoList' && toolCall.args) {
+          // The todos are in the args passed to the tool
+          const args = toolCall.args as Record<string, unknown>;
+          if (args.todos && typeof args.todos === 'string') {
+            todosString = args.todos;
+            break;
           }
-          break;
         }
       }
     }
diff --git a/packages/ai/src/steps/docs-agent/docs-agent-step.ts b/packages/ai/src/steps/docs-agent/docs-agent-step.ts
index 824e6f7b1..6583776dc 100644
--- a/packages/ai/src/steps/docs-agent/docs-agent-step.ts
+++ b/packages/ai/src/steps/docs-agent/docs-agent-step.ts
@@ -79,6 +79,7 @@ const docsAgentExecution = async ({
     const result = await docsAgent.stream(messages, {
       instructions,
       runtimeContext,
+      toolChoice: 'required',
       maxSteps: 50, // Allow more steps for complex documentation tasks
     });
 
@@ -88,14 +89,34 @@ const docsAgentExecution = async ({
     let filesCreated = 0;
     const toolsUsed = new Set<string>();
     let finished = false;
+    let stepCount = 0;
+    let lastTextContent = '';
 
     for await (const chunk of result.fullStream) {
+      // Track step count
+      if (chunk.type === 'step-start') {
+        stepCount++;
+        console.log(`[DocsAgent] Step ${stepCount} started`);
+      }
+
+      // Log text chunks to see what the agent is thinking
+      if (chunk.type === 'text-delta' && chunk.textDelta) {
+        lastTextContent += chunk.textDelta;
+      }
+
+      if (chunk.type === 'step-finish') {
+        console.log(`[DocsAgent] Step ${stepCount} finished. Last text: ${lastTextContent.slice(0, 200)}...`);
+        lastTextContent = '';
+      }
+
       // Track tool usage
       if (chunk.type === 'tool-call') {
+        console.log(`[DocsAgent] Tool call: ${chunk.toolName} with args:`, JSON.stringify(chunk.args).slice(0, 200));
         toolsUsed.add(chunk.toolName);
 
         // Track specific tool outcomes
         if (chunk.toolName === 'createFiles' || chunk.toolName === 'editFiles') {
+          console.log(`[DocsAgent] Tool ${chunk.toolName} called - marking documentationCreated = true`);
           documentationCreated = true;
           filesCreated++;
         }
@@ -127,6 +148,13 @@ const docsAgentExecution = async ({
     // Get the final todo list state
     const finalTodoList = runtimeContext.get(DocsAgentContextKeys.TodoList) as string;
 
+    console.log('[DocsAgent] Final results:', {
+      documentationCreated,
+      filesCreated,
+      toolsUsed: Array.from(toolsUsed),
+      finished,
+    });
+
     return {
       todos: inputData.todos.split('\n').filter((line) => line.trim()),
       todoList: finalTodoList || inputData.todoList,
diff --git a/packages/ai/src/tools/file-tools/bash-tool/bash-execute-tool.ts b/packages/ai/src/tools/file-tools/bash-tool/bash-execute-tool.ts
index c930a053e..dbda584e3 100644
--- a/packages/ai/src/tools/file-tools/bash-tool/bash-execute-tool.ts
+++ b/packages/ai/src/tools/file-tools/bash-tool/bash-execute-tool.ts
@@ -12,9 +12,7 @@ const bashCommandSchema = z.object({
 });
 
 const inputSchema = z.object({
-  commands: z
-    .union([bashCommandSchema, z.array(bashCommandSchema)])
-    .describe('Single command or array of bash commands to execute'),
+  commands: z.array(bashCommandSchema),
 });
 
 const outputSchema = z.object({
@@ -98,7 +96,9 @@ const executeBashCommands = wrapTraced(
 
 export const executeBash = createTool({
   id: 'execute-bash',
-  description: 'Executes bash commands and captures stdout, stderr, and exit codes',
+  description: `Executes bash commands and captures stdout, stderr, and exit codes.
+
+IMPORTANT: The 'commands' field must be an array of command objects: [{command: "pwd", description: "Print working directory"}, {command: "ls", description: "List files"}]`,
   inputSchema,
   outputSchema,
   execute: async ({
diff --git a/packages/ai/src/workflows/docs-agent/docs-agent-workflow.int.test.ts b/packages/ai/src/workflows/docs-agent/docs-agent-workflow.int.test.ts
new file mode 100644
index 000000000..ccd9985a2
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/docs-agent-workflow.int.test.ts
@@ -0,0 +1,433 @@
+import type { Sandbox } from '@buster/sandbox';
+import { currentSpan, initLogger, wrapTraced } from 'braintrust';
+import type { Logger as BraintrustLogger } from 'braintrust';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import type { DocsAgentContext } from '../../context/docs-agent-context';
+import docsAgentWorkflow from './docs-agent-workflow';
+import {
+  TEST_MESSAGES,
+  createContextWithClarifications,
+  createContextWithTodos,
+  createPartiallyCompletedContext,
+  createTestContext,
+  createTestWorkflowInput,
+  validateWorkflowOutput,
+} from './test-helpers/context-helpers';
+import {
+  type TestSandboxResult,
+  addFilesToSandbox,
+  createComplexProjectStructure,
+  createFilesWithMissingDocs,
+  createIntegrationTestSandbox,
+  createMalformedYamlFiles,
+} from './test-helpers/sandbox-helpers';
+
+describe('docs-agent-workflow', () => {
+  let testSandbox: TestSandboxResult | null = null;
+  let braintrustLogger: BraintrustLogger<true> | null = null;
+
+  // Initialize Braintrust logger before each test
+  beforeEach(() => {
+    if (process.env.BRAINTRUST_KEY) {
+      braintrustLogger = initLogger({
+        apiKey: process.env.BRAINTRUST_KEY,
+        projectName: process.env.ENVIRONMENT,
+      });
+    }
+  });
+
+  // Cleanup after each test
+  afterEach(async () => {
+    if (testSandbox) {
+      await testSandbox.cleanup();
+      testSandbox = null;
+    }
+    if (braintrustLogger) {
+      await braintrustLogger.flush();
+      braintrustLogger = null;
+    }
+  });
+
+  /**
+   * Helper to run workflow with Braintrust tracing
+   */
+  async function runWorkflowWithTracing(input: unknown, metadata: Record<string, unknown> = {}) {
+    if (!braintrustLogger) {
+      // Run without tracing if no Braintrust key
+      const run = docsAgentWorkflow.createRun();
+      return await run.start({ inputData: input as any });
+    }
+
+    return await wrapTraced(
+      async () => {
+        currentSpan().log({
+          metadata: {
+            testName: expect.getState().currentTestName,
+            ...metadata,
+          },
+        });
+
+        const run = docsAgentWorkflow.createRun();
+        return await run.start({ inputData: input as any });
+      },
+      {
+        name: 'Docs Agent Workflow Test',
+      }
+    )();
+  }
+
+  describe('basic workflow execution', () => {
+    it('should successfully document a simple dbt project', async () => {
+      // Create test sandbox with a minimal project
+      testSandbox = await createIntegrationTestSandbox({
+        projectOptions: {
+          projectName: 'test_analytics',
+          companyName: 'TestCo',
+          includeDocumentation: false, // Start without docs
+          includeTests: false, // Simplify - no tests
+          includeMacros: false, // Simplify - no macros
+        },
+      });
+
+      const context = createTestContext({
+        sandbox: testSandbox.sandbox,
+      });
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.documentSpecific, // Use simpler test message
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'basic-documentation',
+        projectType: 'simple-dbt',
+      });
+
+      expect(result).toBeDefined();
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        // Check that the workflow completes
+        expect(result.result).toBeDefined();
+        expect(result.result.todos).toBeDefined();
+        expect(result.result.todoList).toBeDefined();
+        
+        // Log what actually happened for debugging
+        console.log('Workflow completed with:', {
+          documentationCreated: result.result.documentationCreated,
+          filesCreated: result.result.metadata?.filesCreated,
+          toolsUsed: result.result.metadata?.toolsUsed,
+          finished: result.result.finished,
+        });
+        
+        // For now, we're just checking that the workflow runs without errors
+        // The mock sandbox doesn't actually create files, but the agent should attempt to
+      }
+    }, 90000); // Increase timeout to 90 seconds
+
+    it('should handle pre-populated todo list', async () => {
+      testSandbox = await createIntegrationTestSandbox();
+      const context = createContextWithTodos(testSandbox.sandbox);
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.completePartial,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'todo-completion',
+      });
+
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        expect(result.result.todos).toBeDefined();
+        expect(result.result.todoList).toBeDefined();
+      }
+    });
+
+    it('should generate clarification questions when needed', async () => {
+      testSandbox = await createIntegrationTestSandbox();
+
+      // Add files with unclear business logic
+      await addFilesToSandbox(
+        testSandbox.sandbox,
+        createFilesWithMissingDocs(),
+        testSandbox.projectPath
+      );
+
+      const context = createTestContext({
+        sandbox: testSandbox.sandbox,
+      });
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.askClarification,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'clarification-needed',
+      });
+
+      const validation = validateWorkflowOutput(result);
+      expect(validation.isValid).toBe(true);
+
+      if (result.status === 'success' && result.result.clarificationNeeded) {
+        expect(result.result.clarificationQuestion).toBeDefined();
+        if (result.result.clarificationQuestion) {
+          expect(result.result.clarificationQuestion.issue).toBeTruthy();
+          expect(result.result.clarificationQuestion.context).toBeTruthy();
+          expect(result.result.clarificationQuestion.clarificationQuestion).toBeTruthy();
+        }
+      }
+    });
+  });
+
+  describe('error handling', () => {
+    it('should handle malformed YAML files gracefully', async () => {
+      testSandbox = await createIntegrationTestSandbox();
+
+      // Add malformed YAML files
+      await addFilesToSandbox(
+        testSandbox.sandbox,
+        createMalformedYamlFiles(),
+        testSandbox.projectPath
+      );
+
+      const context = createTestContext({
+        sandbox: testSandbox.sandbox,
+      });
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.fixYaml,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'error-handling',
+        errorType: 'malformed-yaml',
+      });
+
+      // Should complete successfully
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        expect(result.result).toBeDefined();
+        console.log('Malformed YAML test completed with:', {
+          documentationCreated: result.result.documentationCreated,
+          clarificationNeeded: result.result.clarificationNeeded,
+          toolsUsed: result.result.metadata?.toolsUsed,
+        });
+      }
+    });
+
+    it('should handle missing sandbox gracefully', async () => {
+      const context = createTestContext({
+        sandbox: null as unknown as Sandbox, // Intentionally pass null
+      });
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.documentAll,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'error-handling',
+        errorType: 'missing-sandbox',
+      });
+
+      // Workflow should fail gracefully
+      expect(result.status).toBe('failed');
+    });
+  });
+
+  describe('complex scenarios', () => {
+    it('should handle large project with multiple domains', async () => {
+      testSandbox = await createIntegrationTestSandbox({
+        projectOptions: {
+          includeDocumentation: false,
+        },
+      });
+
+      // Add complex project structure
+      await addFilesToSandbox(
+        testSandbox.sandbox,
+        createComplexProjectStructure(),
+        testSandbox.projectPath
+      );
+
+      const context = createTestContext({
+        sandbox: testSandbox.sandbox,
+      });
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.documentAll,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'complex-project',
+        projectComplexity: 'high',
+      });
+
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        expect(result.result).toBeDefined();
+        console.log('Complex project test completed with:', {
+          documentationCreated: result.result.documentationCreated,
+          filesCreated: result.result.metadata?.filesCreated,
+          toolsUsed: result.result.metadata?.toolsUsed,
+        });
+      }
+    }, 30000); // Increase timeout for complex test
+
+    it('should resume partially completed documentation', async () => {
+      testSandbox = await createIntegrationTestSandbox();
+      const context = createPartiallyCompletedContext(testSandbox.sandbox);
+
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.completePartial,
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'resume-partial',
+      });
+
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        expect(result.result).toBeDefined();
+        console.log('Resume partial test completed with:', {
+          finished: result.result.finished,
+          filesCreated: result.result.metadata?.filesCreated,
+          toolsUsed: result.result.metadata?.toolsUsed,
+        });
+      }
+    });
+  });
+
+  describe('workflow structure', () => {
+    it('should have correct input and output schemas', () => {
+      expect(docsAgentWorkflow.inputSchema).toBeDefined();
+      expect(docsAgentWorkflow.outputSchema).toBeDefined();
+    });
+
+    it('should have all required steps', () => {
+      const workflow = docsAgentWorkflow as any;
+      // The workflow has a steps object with the step definitions
+      const stepKeys = Object.keys(workflow.steps);
+      expect(stepKeys).toHaveLength(3);
+      expect(stepKeys).toContain('initialize-context');
+      expect(stepKeys).toContain('create-docs-todos');
+      expect(stepKeys).toContain('docs-agent');
+    });
+  });
+
+  describe('integration with real sandbox', () => {
+    it.skip('should work with actual Daytona sandbox', async () => {
+      // This test requires actual Daytona setup
+      // Skip in CI, run locally with proper credentials
+
+      testSandbox = await createIntegrationTestSandbox({
+        projectOptions: {
+          projectName: 'production_analytics',
+          companyName: 'RealCo',
+        },
+      });
+
+      const context = createTestContext({
+        sandbox: testSandbox.sandbox,
+      });
+
+      const input = createTestWorkflowInput({
+        message: 'Document the entire dbt project with detailed explanations',
+        context,
+      });
+
+      const result = await runWorkflowWithTracing(input, {
+        testType: 'real-sandbox-integration',
+        environment: 'production-like',
+      });
+
+      expect(result.status).toBe('success');
+      if (result.status === 'success') {
+        expect(result.result.documentationCreated).toBe(true);
+      }
+
+      // Verify files were actually created in sandbox
+      const files = await (testSandbox.sandbox.fs as any).listDirectory(testSandbox.projectPath);
+      expect(files).toContain('README.md');
+    });
+  });
+});
+
+// Performance benchmark tests
+describe('docs-agent-workflow performance', () => {
+  let testSandbox: TestSandboxResult | null = null;
+  let braintrustLogger: BraintrustLogger<true> | null = null;
+
+  // Initialize Braintrust logger before each test
+  beforeEach(() => {
+    if (process.env.BRAINTRUST_KEY) {
+      braintrustLogger = initLogger({
+        apiKey: process.env.BRAINTRUST_KEY,
+        projectName: 'DOCS-AGENT',
+      });
+    }
+  });
+
+  afterEach(async () => {
+    if (testSandbox) {
+      await testSandbox.cleanup();
+      testSandbox = null;
+    }
+    if (braintrustLogger) {
+      await braintrustLogger.flush();
+      braintrustLogger = null;
+    }
+  });
+
+  /**
+   * Helper to run workflow with Braintrust tracing
+   */
+  async function runWorkflowWithTracing(input: unknown, metadata: Record<string, unknown> = {}) {
+    if (!braintrustLogger) {
+      // Run without tracing if no Braintrust key
+      const run = docsAgentWorkflow.createRun();
+      return await run.start({ inputData: input as any });
+    }
+
+    return await wrapTraced(
+      async () => {
+        currentSpan().log({
+          metadata: {
+            testName: expect.getState().currentTestName,
+            ...metadata,
+          },
+        });
+
+        const run = docsAgentWorkflow.createRun();
+        return await run.start({ inputData: input as any });
+      },
+      {
+        name: 'Docs Agent Workflow Test',
+      }
+    )();
+  }
+
+  it('should complete basic documentation within reasonable time', async () => {
+    const startTime = Date.now();
+
+    testSandbox = await createIntegrationTestSandbox();
+    const context = createTestContext({ sandbox: testSandbox.sandbox });
+    const input = createTestWorkflowInput({
+      message: TEST_MESSAGES.documentSpecific,
+      context,
+    });
+
+    await runWorkflowWithTracing(input, {
+      testType: 'performance',
+      benchmark: true,
+    });
+
+    const duration = Date.now() - startTime;
+    expect(duration).toBeLessThan(10000); // Should complete within 10 seconds
+  });
+});
diff --git a/packages/ai/src/workflows/docs-agent/docs-agent-workflow.test.ts b/packages/ai/src/workflows/docs-agent/docs-agent-workflow.test.ts
index e69de29bb..e18957624 100644
--- a/packages/ai/src/workflows/docs-agent/docs-agent-workflow.test.ts
+++ b/packages/ai/src/workflows/docs-agent/docs-agent-workflow.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from 'vitest';
+import docsAgentWorkflow from './docs-agent-workflow';
+import {
+  TEST_MESSAGES,
+  createTestContext,
+  createTestWorkflowInput,
+} from './test-helpers/context-helpers';
+import { createMockSandbox } from './test-helpers/mock-sandbox';
+
+describe('docs-agent-workflow with mock sandbox', () => {
+  describe('workflow structure', () => {
+    it('should have correct input and output schemas', () => {
+      expect(docsAgentWorkflow.inputSchema).toBeDefined();
+      expect(docsAgentWorkflow.outputSchema).toBeDefined();
+    });
+
+    it('should have all required steps', () => {
+      const workflow = docsAgentWorkflow as any;
+      // The workflow has a steps object with the step definitions
+      const stepKeys = Object.keys(workflow.steps);
+      expect(stepKeys).toHaveLength(3);
+      expect(stepKeys).toContain('initialize-context');
+      expect(stepKeys).toContain('create-docs-todos');
+      expect(stepKeys).toContain('docs-agent');
+    });
+  });
+
+  describe('test helpers', () => {
+    it('should create mock sandbox successfully', () => {
+      const mockSandbox = createMockSandbox();
+      expect(mockSandbox).toBeDefined();
+      expect(mockSandbox.id).toContain('mock-sandbox-');
+      expect(mockSandbox.fs).toBeDefined();
+    });
+
+    it('should create valid test context', () => {
+      const mockSandbox = createMockSandbox();
+      const context = createTestContext({ sandbox: mockSandbox });
+
+      expect(context).toBeDefined();
+      expect(context.sandbox).toBe(mockSandbox);
+      expect(context.dataSourceId).toMatch(
+        /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
+      );
+      expect(context.todoList).toBe('');
+      expect(context.clarificationQuestions).toEqual([]);
+    });
+
+    it('should create valid workflow input', () => {
+      const mockSandbox = createMockSandbox();
+      const context = createTestContext({ sandbox: mockSandbox });
+      const input = createTestWorkflowInput({
+        message: TEST_MESSAGES.documentAll,
+        context,
+      });
+
+      expect(input).toBeDefined();
+      expect(input.message).toBe(TEST_MESSAGES.documentAll);
+      expect(input.organizationId).toBe('test-org-123');
+      expect(input.context).toBe(context);
+    });
+  });
+
+  describe('mock sandbox functionality', () => {
+    it('should upload and read files', async () => {
+      const mockSandbox = createMockSandbox();
+
+      // Upload a file
+      await mockSandbox.fs.uploadFile('Test content', 'test.txt');
+
+      // Read the file
+      const content = await (mockSandbox.fs as any).readFile('test.txt');
+      expect(content).toBe('Test content');
+    });
+
+    it('should list directory contents', async () => {
+      const mockSandbox = createMockSandbox();
+
+      // Upload multiple files
+      await mockSandbox.fs.uploadFile('Content 1', 'dir/file1.txt');
+      await mockSandbox.fs.uploadFile('Content 2', 'dir/file2.txt');
+      await mockSandbox.fs.uploadFile('Content 3', 'other/file3.txt');
+
+      // List directory
+      const files = await (mockSandbox.fs as any).listDirectory('dir');
+      expect(files).toHaveLength(2);
+      expect(files).toContain('file1.txt');
+      expect(files).toContain('file2.txt');
+    });
+  });
+});
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/README.md b/packages/ai/src/workflows/docs-agent/test-helpers/README.md
new file mode 100644
index 000000000..d213c942a
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/README.md
@@ -0,0 +1,175 @@
+# Docs Agent Test Helpers
+
+This directory contains test helpers for the docs agent workflow, including utilities for creating mock dbt projects, managing sandboxes, and running tests with Braintrust integration.
+
+## Overview
+
+The test helpers provide:
+
+1. **Mock dbt Project Generator** - Creates realistic dbt projects for testing
+2. **Sandbox Management** - Handles creation, file uploads, and cleanup of Daytona sandboxes
+3. **Context Builders** - Creates valid `DocsAgentContext` objects for testing
+4. **Braintrust Integration** - Wraps test execution with observability
+
+## Usage
+
+### Basic Test Setup
+
+```typescript
+import { createTestSandbox, createTestContext, createTestWorkflowInput } from './test-helpers';
+import docsAgentWorkflow from './docs-agent-workflow';
+
+// Create a test sandbox with mock dbt project
+const testSandbox = await createTestSandbox({
+  projectOptions: {
+    projectName: 'analytics',
+    companyName: 'TestCo',
+    includeDocumentation: false,
+  }
+});
+
+// Create context and input
+const context = createTestContext({
+  sandbox: testSandbox.sandbox,
+});
+
+const input = createTestWorkflowInput({
+  message: 'Document all models in this dbt project',
+  context,
+});
+
+// Run workflow
+const result = await docsAgentWorkflow.createRun().start({ inputData: input });
+
+// Cleanup
+await testSandbox.cleanup();
+```
+
+### Mock dbt Project Options
+
+The `generateMockDbtProject` function creates a complete dbt project with:
+
+- **Staging models** - Stripe data transformations
+- **Mart models** - Business metrics (MRR, revenue)
+- **Schema documentation** - YAML files with descriptions
+- **Tests** - Data quality checks
+- **Macros** - Custom dbt macros
+- **Configuration** - dbt_project.yml, packages.yml
+
+### Test Scenarios
+
+#### 1. Basic Documentation
+```typescript
+const testSandbox = await createTestSandbox({
+  projectOptions: {
+    includeDocumentation: false, // Start without docs
+  }
+});
+```
+
+#### 2. Missing Documentation
+```typescript
+await addFilesToSandbox(
+  testSandbox.sandbox,
+  createFilesWithMissingDocs(),
+  testSandbox.projectPath
+);
+```
+
+#### 3. Malformed YAML
+```typescript
+await addFilesToSandbox(
+  testSandbox.sandbox,
+  createMalformedYamlFiles(),
+  testSandbox.projectPath
+);
+```
+
+#### 4. Complex Project
+```typescript
+await addFilesToSandbox(
+  testSandbox.sandbox,
+  createComplexProjectStructure(),
+  testSandbox.projectPath
+);
+```
+
+### Context Variations
+
+```typescript
+// Context with pre-populated todos
+const context = createContextWithTodos(sandbox);
+
+// Context with clarification questions
+const context = createContextWithClarifications(sandbox);
+
+// Partially completed context
+const context = createPartiallyCompletedContext(sandbox);
+```
+
+### Running Tests
+
+```bash
+# Run all workflow tests
+bun test docs-agent-workflow.test.ts
+
+# Run specific test
+bun test docs-agent-workflow.test.ts -t "should successfully document"
+
+# Run example
+tsx packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts
+```
+
+## File Structure
+
+```
+test-helpers/
+├── mock-dbt-project.ts      # dbt project generator
+├── sandbox-helpers.ts       # Sandbox management utilities
+├── context-helpers.ts       # Context builders and validators
+├── run-example.ts          # Example runner script
+├── index.ts               # Exports
+└── README.md             # This file
+```
+
+## Key Functions
+
+### Mock Project Generation
+- `generateMockDbtProject()` - Creates complete dbt project
+- `generateProjectVariations()` - Returns different project configurations
+
+### Sandbox Management
+- `createTestSandbox()` - Creates sandbox with mock project
+- `addFilesToSandbox()` - Adds additional files
+- `createLocalTestProject()` - Creates local temp directory (for non-sandbox testing)
+
+### Context Helpers
+- `createTestContext()` - Creates basic context
+- `createTestWorkflowInput()` - Creates workflow input
+- `validateWorkflowOutput()` - Validates workflow results
+
+### Test Data
+- `createMalformedYamlFiles()` - Invalid YAML for error testing
+- `createFilesWithMissingDocs()` - Models without documentation
+- `createComplexProjectStructure()` - Multi-domain project
+
+## Braintrust Integration
+
+Tests automatically integrate with Braintrust when `BRAINTRUST_KEY` is set:
+
+```typescript
+const result = await runWorkflowWithTracing(input, {
+  testType: 'basic-documentation',
+  projectType: 'simple-dbt',
+});
+```
+
+Traces appear in the `DOCS-AGENT` project in Braintrust.
+
+## Best Practices
+
+1. **Always cleanup sandboxes** - Use the cleanup function in afterEach hooks
+2. **Use descriptive test metadata** - Pass meaningful metadata to Braintrust
+3. **Test edge cases** - Include malformed files, missing docs, complex structures
+4. **Validate outputs** - Use the validation helper to check workflow results
+5. **Mock realistically** - The mock dbt project should resemble real projects
\ No newline at end of file
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/context-helpers.ts b/packages/ai/src/workflows/docs-agent/test-helpers/context-helpers.ts
new file mode 100644
index 000000000..243c3d58d
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/context-helpers.ts
@@ -0,0 +1,176 @@
+import type { Sandbox } from '@buster/sandbox';
+import type { DocsAgentContext } from '../../../context/docs-agent-context';
+
+export interface CreateTestContextOptions {
+  sandbox: Sandbox;
+  dataSourceId?: string;
+  todoList?: string[];
+  clarificationQuestions?: Array<{
+    issue: string;
+    context: string;
+    clarificationQuestion: string;
+  }>;
+}
+
+/**
+ * Creates a test DocsAgentContext with sensible defaults
+ */
+export function createTestContext(options: CreateTestContextOptions): DocsAgentContext {
+  const {
+    sandbox,
+    dataSourceId = '550e8400-e29b-41d4-a716-446655440000', // Valid UUID v4
+    todoList = [],
+    clarificationQuestions = [],
+  } = options;
+
+  return {
+    sandbox,
+    todoList: todoList.join('\n'),
+    clarificationQuestions,
+    dataSourceId,
+  };
+}
+
+/**
+ * Creates a context with pre-populated todos for testing
+ */
+export function createContextWithTodos(sandbox: Sandbox): DocsAgentContext {
+  return createTestContext({
+    sandbox,
+    todoList: [
+      'Document the staging models in models/staging/stripe/',
+      'Add descriptions to all columns in fct_mrr model',
+      'Create README for the finance mart',
+      'Update main project README with setup instructions',
+    ],
+  });
+}
+
+/**
+ * Creates a context with clarification questions
+ */
+export function createContextWithClarifications(sandbox: Sandbox): DocsAgentContext {
+  return createTestContext({
+    sandbox,
+    clarificationQuestions: [
+      {
+        issue: 'Missing column documentation',
+        context: 'The stg_stripe__customers model has columns without descriptions',
+        clarificationQuestion:
+          'What does the "delinquent" column represent in the customers table?',
+      },
+      {
+        issue: 'Unclear business logic',
+        context: 'The MRR calculation in fct_mrr uses complex logic',
+        clarificationQuestion:
+          'Should MRR include customers in trial status or only active paying customers?',
+      },
+    ],
+  });
+}
+
+/**
+ * Creates a context simulating a partially completed workflow
+ */
+export function createPartiallyCompletedContext(sandbox: Sandbox): DocsAgentContext {
+  return createTestContext({
+    sandbox,
+    todoList: [
+      'Document the staging models in models/staging/stripe/', // Completed
+      'Add descriptions to all columns in fct_mrr model',
+      'Create README for the finance mart', // Completed
+      'Update main project README with setup instructions',
+    ],
+  });
+}
+
+/**
+ * Creates test inputs for the workflow
+ */
+export interface CreateTestInputOptions {
+  message?: string;
+  organizationId?: string;
+  context: DocsAgentContext;
+}
+
+export function createTestWorkflowInput(options: CreateTestInputOptions) {
+  const {
+    message = 'Please document all the models in this dbt project',
+    organizationId = 'test-org-123',
+    context,
+  } = options;
+
+  return {
+    message,
+    organizationId,
+    context,
+  };
+}
+
+/**
+ * Common test messages for different scenarios
+ */
+export const TEST_MESSAGES = {
+  documentAll:
+    'Please document all the models in this dbt project. The project files are in the dbt_project directory. First, use grepSearch to find all .sql and .yml files. Use path "dbt_project" and pattern "\\.sql$|\\.yml$" with recursive=true.',
+  documentSpecific:
+    'Please create a simple documentation file for the fct_mrr model. Just add a basic schema.yml file in the dbt_project/models/marts/finance/ directory with a description for the model.',
+  updateReadme: 'Update the README files to include setup instructions',
+  addTests: 'Add schema tests for all staging models',
+  fixYaml: 'Fix any malformed YAML files in the project',
+  askClarification: 'Document the project but I need clarification on business logic',
+  completePartial: 'Continue documenting from where we left off',
+};
+
+/**
+ * Helper to validate workflow output
+ */
+export function validateWorkflowOutput(output: unknown): {
+  isValid: boolean;
+  errors: string[];
+} {
+  const errors: string[] = [];
+
+  // Type guard to check if output is an object
+  if (!output || typeof output !== 'object') {
+    errors.push('Output must be an object');
+    return { isValid: false, errors };
+  }
+
+  const outputObj = output as Record<string, unknown>;
+
+  // Check for required fields based on the output type
+  if (outputObj.clarificationNeeded) {
+    if (!outputObj.clarificationQuestion) {
+      errors.push('clarificationQuestion is required when clarificationNeeded is true');
+    }
+    const clarificationQuestion = outputObj.clarificationQuestion as Record<string, unknown>;
+    if (clarificationQuestion) {
+      if (!clarificationQuestion.issue) {
+        errors.push('clarificationQuestion.issue is required');
+      }
+      if (!clarificationQuestion.context) {
+        errors.push('clarificationQuestion.context is required');
+      }
+      if (!clarificationQuestion.clarificationQuestion) {
+        errors.push('clarificationQuestion.clarificationQuestion is required');
+      }
+    }
+  }
+
+  if (outputObj.documentationCreated) {
+    const metadata = outputObj.metadata as Record<string, unknown>;
+    if (metadata && typeof metadata.filesCreated !== 'number') {
+      errors.push('metadata.filesCreated should be a number when documentation is created');
+    }
+  }
+
+  if (outputObj.todos && !Array.isArray(outputObj.todos)) {
+    errors.push('todos should be an array');
+  }
+
+  return {
+    isValid: errors.length === 0,
+    errors,
+  };
+}
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/index.ts b/packages/ai/src/workflows/docs-agent/test-helpers/index.ts
new file mode 100644
index 000000000..5bbb26d68
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/index.ts
@@ -0,0 +1,5 @@
+// Export all test helpers
+export * from './mock-dbt-project';
+export * from './sandbox-helpers';
+export * from './context-helpers';
+export * from './mock-sandbox';
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/mock-dbt-project.ts b/packages/ai/src/workflows/docs-agent/test-helpers/mock-dbt-project.ts
new file mode 100644
index 000000000..77af95a00
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/mock-dbt-project.ts
@@ -0,0 +1,1239 @@
+import type { FileInput } from '@buster/sandbox';
+
+export interface MockDbtProjectOptions {
+  projectName?: string;
+  companyName?: string;
+  includeDocumentation?: boolean;
+  includeTests?: boolean;
+  includeMacros?: boolean;
+  includeIntermediateModels?: boolean;
+  includeMultipleSources?: boolean;
+}
+
+export function generateMockDbtProject(options: MockDbtProjectOptions = {}): FileInput[] {
+  const {
+    projectName = 'analytics',
+    companyName = 'SaaSCo',
+    includeDocumentation = true,
+    includeTests = true,
+    includeMacros = true,
+    includeIntermediateModels = true,
+    includeMultipleSources = true,
+  } = options;
+
+  const files: FileInput[] = [];
+
+  // Root configuration files
+  files.push({
+    path: 'dbt_project.yml',
+    content: `name: '${projectName}'
+version: '1.0.0'
+config-version: 2
+
+profile: '${projectName}'
+
+model-paths: ["models"]
+analysis-paths: ["analyses"]
+test-paths: ["tests"]
+seed-paths: ["data"]
+macro-paths: ["macros"]
+snapshot-paths: ["snapshots"]
+
+target-path: "target"
+clean-targets:
+  - "target"
+  - "dbt_packages"
+
+models:
+  ${projectName}:
+    staging:
+      +materialized: view
+      stripe:
+        +schema: staging_stripe
+      salesforce:
+        +schema: staging_salesforce
+      postgres:
+        +schema: staging_postgres
+    intermediate:
+      +materialized: view
+      finance:
+        +schema: intermediate_finance
+      sales:
+        +schema: intermediate_sales
+    marts:
+      +materialized: table
+      finance:
+        +schema: marts_finance
+      sales:
+        +schema: marts_sales
+      marketing:
+        +schema: marts_marketing
+`,
+  });
+
+  // Root README
+  files.push({
+    path: 'README.md',
+    content: `# ${companyName} Analytics
+
+This is the analytics repository for ${companyName}.
+
+## Overview
+
+This dbt project transforms raw data into analytics-ready datasets.
+
+## Structure
+
+- \`models/staging/\` - Raw data transformations
+  - \`stripe/\` - Payment processing data
+  - \`salesforce/\` - CRM data
+  - \`postgres/\` - Application database data
+- \`models/intermediate/\` - Business logic transformations
+  - \`finance/\` - Financial calculations
+  - \`sales/\` - Sales metrics preparation
+- \`models/marts/\` - Business-ready datasets
+  - \`finance/\` - Financial reporting
+  - \`sales/\` - Sales analytics
+  - \`marketing/\` - Marketing analytics
+
+## Setup
+
+1. Install dbt
+2. Configure your profile
+3. Run \`dbt deps\`
+4. Run \`dbt build\`
+`,
+  });
+
+  // Staging models - Stripe
+  files.push({
+    path: 'models/staging/stripe/stg_stripe__customers.sql',
+    content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as customer_id,
+    email,
+    name as customer_name,
+    created as created_at,
+    updated as updated_at,
+    currency,
+    delinquent as is_delinquent,
+    balance,
+    _sdc_extracted_at
+from {{ source('stripe', 'customers') }}
+where deleted is false
+`,
+  });
+
+  files.push({
+    path: 'models/staging/stripe/stg_stripe__subscriptions.sql',
+    content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as subscription_id,
+    customer as customer_id,
+    status,
+    current_period_start,
+    current_period_end,
+    created as created_at,
+    updated as updated_at,
+    cancel_at,
+    canceled_at,
+    trial_start,
+    trial_end,
+    _sdc_extracted_at
+from {{ source('stripe', 'subscriptions') }}
+`,
+  });
+
+  files.push({
+    path: 'models/staging/stripe/stg_stripe__invoices.sql',
+    content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as invoice_id,
+    customer as customer_id,
+    subscription as subscription_id,
+    status,
+    amount_paid,
+    amount_due,
+    amount_remaining,
+    currency,
+    created as created_at,
+    period_start,
+    period_end,
+    _sdc_extracted_at
+from {{ source('stripe', 'invoices') }}
+where status != 'draft'
+`,
+  });
+
+  files.push({
+    path: 'models/staging/stripe/stg_stripe__charges.sql',
+    content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as charge_id,
+    amount,
+    amount_refunded,
+    currency,
+    customer as customer_id,
+    description,
+    invoice as invoice_id,
+    paid as is_paid,
+    refunded as is_refunded,
+    status,
+    created as created_at,
+    _sdc_extracted_at
+from {{ source('stripe', 'charges') }}
+`,
+  });
+
+  // Staging models - Salesforce (if multiple sources included)
+  if (includeMultipleSources) {
+    files.push({
+      path: 'models/staging/salesforce/stg_salesforce__accounts.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as account_id,
+    name as account_name,
+    type as account_type,
+    industry,
+    annual_revenue,
+    number_of_employees,
+    billing_country,
+    billing_state,
+    created_date as created_at,
+    last_modified_date as updated_at,
+    is_deleted
+from {{ source('salesforce', 'accounts') }}
+where is_deleted = false
+`,
+    });
+
+    files.push({
+      path: 'models/staging/salesforce/stg_salesforce__opportunities.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as opportunity_id,
+    account_id,
+    name as opportunity_name,
+    stage_name,
+    amount,
+    probability,
+    close_date,
+    type as opportunity_type,
+    lead_source,
+    is_closed,
+    is_won,
+    created_date as created_at,
+    last_modified_date as updated_at
+from {{ source('salesforce', 'opportunities') }}
+where is_deleted = false
+`,
+    });
+
+    files.push({
+      path: 'models/staging/salesforce/stg_salesforce__contacts.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as contact_id,
+    account_id,
+    first_name,
+    last_name,
+    email,
+    phone,
+    title,
+    department,
+    created_date as created_at,
+    last_modified_date as updated_at
+from {{ source('salesforce', 'contacts') }}
+where is_deleted = false
+`,
+    });
+
+    // Staging models - Application Database
+    files.push({
+      path: 'models/staging/postgres/stg_postgres__users.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as user_id,
+    email,
+    name as user_name,
+    role as user_role,
+    status as user_status,
+    created_at,
+    updated_at,
+    last_login_at,
+    is_active
+from {{ source('postgres', 'users') }}
+where deleted_at is null
+`,
+    });
+
+    files.push({
+      path: 'models/staging/postgres/stg_postgres__events.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+select
+    id as event_id,
+    user_id,
+    event_type,
+    event_properties,
+    session_id,
+    created_at as event_timestamp
+from {{ source('postgres', 'events') }}
+`,
+    });
+  }
+
+  // Intermediate models (if included)
+  if (includeIntermediateModels) {
+    files.push({
+      path: 'models/intermediate/finance/int_revenue_by_month.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+with invoice_revenue as (
+    select
+        date_trunc('month', period_start) as revenue_month,
+        customer_id,
+        currency,
+        sum(amount_paid) as amount_paid_cents,
+        count(distinct invoice_id) as invoice_count
+    from {{ ref('stg_stripe__invoices') }}
+    where status = 'paid'
+    group by 1, 2, 3
+)
+
+select
+    revenue_month,
+    customer_id,
+    currency,
+    amount_paid_cents / 100.0 as revenue,
+    invoice_count
+from invoice_revenue
+`,
+    });
+
+    files.push({
+      path: 'models/intermediate/finance/int_subscription_periods.sql',
+      content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+with subscription_changes as (
+    select
+        subscription_id,
+        customer_id,
+        status,
+        created_at,
+        canceled_at,
+        current_period_start,
+        current_period_end,
+        lag(status) over (partition by subscription_id order by updated_at) as previous_status
+    from {{ ref('stg_stripe__subscriptions') }}
+)
+
+select
+    subscription_id,
+    customer_id,
+    status,
+    created_at as period_start,
+    coalesce(canceled_at, current_period_end) as period_end,
+    case
+        when previous_status is null then 'new'
+        when previous_status != status then 'changed'
+        else 'unchanged'
+    end as change_type
+from subscription_changes
+`,
+    });
+
+    if (includeMultipleSources) {
+      files.push({
+        path: 'models/intermediate/sales/int_opportunity_timeline.sql',
+        content: `{{
+  config(
+    materialized='view'
+  )
+}}
+
+with opportunity_stages as (
+    select
+        opportunity_id,
+        account_id,
+        opportunity_name,
+        stage_name,
+        amount,
+        probability,
+        close_date,
+        created_at,
+        updated_at,
+        datediff('day', created_at, close_date) as days_to_close,
+        datediff('day', created_at, current_date) as days_open
+    from {{ ref('stg_salesforce__opportunities') }}
+)
+
+select
+    *,
+    case
+        when days_to_close <= 30 then 'quick'
+        when days_to_close <= 90 then 'standard'
+        else 'long'
+    end as sales_cycle_category
+from opportunity_stages
+`,
+      });
+    }
+  }
+
+  // Marts models - Finance
+  files.push({
+    path: 'models/marts/finance/fct_mrr.sql',
+    content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+with active_subscriptions as (
+    select
+        customer_id,
+        subscription_id,
+        current_period_start,
+        current_period_end,
+        status
+    from {{ ref('stg_stripe__subscriptions') }}
+    where status in ('active', 'trialing')
+),
+
+latest_invoices as (
+    select
+        subscription_id,
+        amount_paid / 100.0 as amount_paid_dollars,
+        period_start,
+        period_end,
+        row_number() over (partition by subscription_id order by created_at desc) as rn
+    from {{ ref('stg_stripe__invoices') }}
+    where status = 'paid'
+)
+
+select
+    s.customer_id,
+    c.customer_name,
+    c.email,
+    count(distinct s.subscription_id) as active_subscriptions,
+    sum(i.amount_paid_dollars) as mrr,
+    min(s.current_period_start) as earliest_subscription_start,
+    max(s.current_period_end) as latest_subscription_end
+from active_subscriptions s
+join {{ ref('stg_stripe__customers') }} c
+    on s.customer_id = c.customer_id
+left join latest_invoices i
+    on s.subscription_id = i.subscription_id
+    and i.rn = 1
+group by 1, 2, 3
+`,
+  });
+
+  files.push({
+    path: 'models/marts/finance/fct_revenue_metrics.sql',
+    content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+with monthly_revenue as (
+    select
+        date_trunc('month', period_start) as month,
+        sum(amount_paid) / 100.0 as revenue
+    from {{ ref('stg_stripe__invoices') }}
+    where status = 'paid'
+    group by 1
+),
+
+customer_counts as (
+    select
+        date_trunc('month', current_period_start) as month,
+        count(distinct customer_id) as active_customers
+    from {{ ref('stg_stripe__subscriptions') }}
+    where status in ('active', 'trialing')
+    group by 1
+)
+
+select
+    r.month,
+    r.revenue,
+    c.active_customers,
+    r.revenue / nullif(c.active_customers, 0) as arpu,
+    lag(r.revenue) over (order by r.month) as previous_month_revenue,
+    (r.revenue - lag(r.revenue) over (order by r.month)) / 
+        nullif(lag(r.revenue) over (order by r.month), 0) * 100 as revenue_growth_pct
+from monthly_revenue r
+left join customer_counts c
+    on r.month = c.month
+order by r.month desc
+`,
+  });
+
+  files.push({
+    path: 'models/marts/finance/dim_customers.sql',
+    content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+with customer_base as (
+    select
+        customer_id,
+        customer_name,
+        email,
+        currency,
+        is_delinquent,
+        created_at,
+        updated_at
+    from {{ ref('stg_stripe__customers') }}
+),
+
+subscription_summary as (
+    select
+        customer_id,
+        count(distinct subscription_id) as total_subscriptions,
+        count(distinct case when status = 'active' then subscription_id end) as active_subscriptions,
+        min(created_at) as first_subscription_date,
+        max(created_at) as latest_subscription_date
+    from {{ ref('stg_stripe__subscriptions') }}
+    group by 1
+),
+
+revenue_summary as (
+    select
+        customer_id,
+        sum(amount_paid) / 100.0 as lifetime_revenue,
+        count(distinct invoice_id) as total_invoices,
+        max(created_at) as last_payment_date
+    from {{ ref('stg_stripe__invoices') }}
+    where status = 'paid'
+    group by 1
+)
+
+select
+    c.*,
+    coalesce(s.total_subscriptions, 0) as total_subscriptions,
+    coalesce(s.active_subscriptions, 0) as active_subscriptions,
+    s.first_subscription_date,
+    s.latest_subscription_date,
+    coalesce(r.lifetime_revenue, 0) as lifetime_revenue,
+    coalesce(r.total_invoices, 0) as total_invoices,
+    r.last_payment_date,
+    case
+        when s.active_subscriptions > 0 then 'active'
+        when s.total_subscriptions > 0 then 'churned'
+        else 'prospect'
+    end as customer_status
+from customer_base c
+left join subscription_summary s on c.customer_id = s.customer_id
+left join revenue_summary r on c.customer_id = r.customer_id
+`,
+  });
+
+  // Marts models - Sales (if multiple sources)
+  if (includeMultipleSources) {
+    files.push({
+      path: 'models/marts/sales/fct_sales_pipeline.sql',
+      content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+select
+    o.opportunity_id,
+    o.account_id,
+    a.account_name,
+    a.account_type,
+    a.industry,
+    o.opportunity_name,
+    o.stage_name,
+    o.amount,
+    o.probability,
+    o.close_date,
+    o.is_closed,
+    o.is_won,
+    o.created_at,
+    datediff('day', o.created_at, current_date) as days_in_pipeline,
+    o.amount * (o.probability / 100.0) as weighted_amount
+from {{ ref('stg_salesforce__opportunities') }} o
+join {{ ref('stg_salesforce__accounts') }} a
+    on o.account_id = a.account_id
+`,
+    });
+
+    files.push({
+      path: 'models/marts/sales/dim_accounts.sql',
+      content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+with account_metrics as (
+    select
+        account_id,
+        count(distinct opportunity_id) as total_opportunities,
+        sum(case when is_won then 1 else 0 end) as won_opportunities,
+        sum(case when is_won then amount else 0 end) as total_revenue,
+        max(close_date) as last_opportunity_date
+    from {{ ref('stg_salesforce__opportunities') }}
+    group by 1
+)
+
+select
+    a.*,
+    coalesce(m.total_opportunities, 0) as total_opportunities,
+    coalesce(m.won_opportunities, 0) as won_opportunities,
+    coalesce(m.total_revenue, 0) as total_revenue,
+    m.last_opportunity_date,
+    case
+        when m.won_opportunities > 0 then 'customer'
+        when m.total_opportunities > 0 then 'prospect'
+        else 'lead'
+    end as account_status
+from {{ ref('stg_salesforce__accounts') }} a
+left join account_metrics m on a.account_id = m.account_id
+`,
+    });
+
+    // Marts - Marketing
+    files.push({
+      path: 'models/marts/marketing/user_engagement_summary.sql',
+      content: `{{
+  config(
+    materialized='table'
+  )
+}}
+
+with user_events as (
+    select
+        u.user_id,
+        u.email,
+        u.user_name,
+        u.created_at as user_created_at,
+        count(distinct e.event_id) as total_events,
+        count(distinct e.session_id) as total_sessions,
+        count(distinct date(e.event_timestamp)) as active_days,
+        min(e.event_timestamp) as first_event_timestamp,
+        max(e.event_timestamp) as last_event_timestamp
+    from {{ ref('stg_postgres__users') }} u
+    left join {{ ref('stg_postgres__events') }} e
+        on u.user_id = e.user_id
+    group by 1, 2, 3, 4
+),
+
+user_revenue as (
+    select
+        c.email,
+        sum(i.amount_paid) / 100.0 as total_revenue
+    from {{ ref('stg_stripe__customers') }} c
+    join {{ ref('stg_stripe__invoices') }} i
+        on c.customer_id = i.customer_id
+    where i.status = 'paid'
+    group by 1
+)
+
+select
+    ue.*,
+    coalesce(ur.total_revenue, 0) as total_revenue,
+    datediff('day', ue.user_created_at, current_date) as days_since_signup,
+    datediff('day', ue.last_event_timestamp, current_date) as days_since_last_activity
+from user_events ue
+left join user_revenue ur on ue.email = ur.email
+`,
+    });
+  }
+
+  // Staging schema documentation
+  if (includeDocumentation) {
+    files.push({
+      path: 'models/staging/stripe/schema.yml',
+      content: `version: 2
+
+sources:
+  - name: stripe
+    database: raw
+    schema: stripe
+    tables:
+      - name: customers
+        description: "Raw customer data from Stripe"
+      - name: subscriptions
+        description: "Raw subscription data from Stripe"
+      - name: invoices
+        description: "Raw invoice data from Stripe"
+      - name: charges
+        description: "Raw charge/payment data from Stripe"
+
+models:
+  - name: stg_stripe__customers
+    description: "Staged customer data from Stripe"
+    columns:
+      - name: customer_id
+        description: "Unique identifier for the customer"
+        tests:
+          - unique
+          - not_null
+      - name: email
+        description: "Customer email address"
+      - name: customer_name
+        description: "Customer full name"
+
+  - name: stg_stripe__subscriptions
+    description: "Staged subscription data from Stripe"
+    columns:
+      - name: subscription_id
+        description: "Unique identifier for the subscription"
+        tests:
+          - unique
+          - not_null
+      - name: customer_id
+        description: "Reference to the customer"
+        tests:
+          - not_null
+          - relationships:
+              to: ref('stg_stripe__customers')
+              field: customer_id
+
+  - name: stg_stripe__invoices
+    description: "Staged invoice data from Stripe"
+    columns:
+      - name: invoice_id
+        description: "Unique identifier for the invoice"
+        tests:
+          - unique
+          - not_null
+      
+  - name: stg_stripe__charges
+    description: "Staged charge data from Stripe"
+    columns:
+      - name: charge_id
+        description: "Unique identifier for the charge"
+        tests:
+          - unique
+          - not_null
+`,
+    });
+
+    if (includeMultipleSources) {
+      files.push({
+        path: 'models/staging/salesforce/schema.yml',
+        content: `version: 2
+
+sources:
+  - name: salesforce
+    database: raw
+    schema: salesforce
+    tables:
+      - name: accounts
+        description: "Company/account data from Salesforce"
+      - name: opportunities
+        description: "Sales opportunity data from Salesforce"
+      - name: contacts
+        description: "Contact data from Salesforce"
+
+models:
+  - name: stg_salesforce__accounts
+    description: "Staged account data from Salesforce"
+    columns:
+      - name: account_id
+        description: "Unique identifier for the account"
+        tests:
+          - unique
+          - not_null
+
+  - name: stg_salesforce__opportunities
+    description: "Staged opportunity data from Salesforce"
+    columns:
+      - name: opportunity_id
+        description: "Unique identifier for the opportunity"
+        tests:
+          - unique
+          - not_null
+
+  - name: stg_salesforce__contacts
+    description: "Staged contact data from Salesforce"
+    columns:
+      - name: contact_id
+        description: "Unique identifier for the contact"
+        tests:
+          - unique
+          - not_null
+`,
+      });
+
+      files.push({
+        path: 'models/staging/postgres/schema.yml',
+        content: `version: 2
+
+sources:
+  - name: postgres
+    database: raw
+    schema: app
+    tables:
+      - name: users
+        description: "Application user data"
+      - name: events
+        description: "User event tracking data"
+
+models:
+  - name: stg_postgres__users
+    description: "Staged user data from application database"
+    columns:
+      - name: user_id
+        description: "Unique identifier for the user"
+        tests:
+          - unique
+          - not_null
+
+  - name: stg_postgres__events
+    description: "Staged event data from application database"
+    columns:
+      - name: event_id
+        description: "Unique identifier for the event"
+        tests:
+          - unique
+          - not_null
+`,
+      });
+
+      files.push({
+        path: 'models/staging/salesforce/README.md',
+        content: `# Salesforce Staging Models
+
+This directory contains staging models for Salesforce CRM data.
+
+## Source Data
+- **accounts**: Company/account records
+- **opportunities**: Sales pipeline data
+- **contacts**: Individual contact records
+
+## Staging Models
+- **stg_salesforce__accounts**: Cleaned and renamed account data
+- **stg_salesforce__opportunities**: Standardized opportunity records
+- **stg_salesforce__contacts**: Processed contact information
+`,
+      });
+
+      files.push({
+        path: 'models/staging/postgres/README.md',
+        content: `# Application Database Staging Models
+
+This directory contains staging models for our main application database.
+
+## Source Data
+- **users**: Application user accounts
+- **events**: User interaction tracking
+
+## Staging Models
+- **stg_postgres__users**: Cleaned user data with soft deletes filtered
+- **stg_postgres__events**: Event stream data with consistent timestamps
+`,
+      });
+    }
+
+    files.push({
+      path: 'models/staging/stripe/README.md',
+      content: `# Stripe Staging Models
+
+This directory contains staging models for Stripe payment data.
+
+## Source Data
+- **customers**: Stripe customer records
+- **subscriptions**: Subscription lifecycle data
+- **invoices**: Invoice and billing data
+- **charges**: Individual payment transactions
+
+## Staging Models
+- **stg_stripe__customers**: Cleaned customer data with consistent naming
+- **stg_stripe__subscriptions**: Subscription records with proper timestamps
+- **stg_stripe__invoices**: Invoice data excluding drafts
+- **stg_stripe__charges**: Payment transaction records
+`,
+    });
+  }
+
+  // Intermediate documentation
+  if (includeDocumentation && includeIntermediateModels) {
+    files.push({
+      path: 'models/intermediate/finance/schema.yml',
+      content: `version: 2
+
+models:
+  - name: int_revenue_by_month
+    description: "Monthly revenue aggregated by customer"
+    columns:
+      - name: revenue_month
+        description: "Month of the revenue"
+      - name: customer_id
+        description: "Customer identifier"
+      - name: revenue
+        description: "Total revenue in dollars for the month"
+
+  - name: int_subscription_periods
+    description: "Subscription lifecycle periods with status changes"
+    columns:
+      - name: subscription_id
+        description: "Subscription identifier"
+      - name: change_type
+        description: "Type of change (new, changed, unchanged)"
+`,
+    });
+
+    if (includeMultipleSources) {
+      files.push({
+        path: 'models/intermediate/sales/schema.yml',
+        content: `version: 2
+
+models:
+  - name: int_opportunity_timeline
+    description: "Opportunity timeline analysis with calculated metrics"
+    columns:
+      - name: opportunity_id
+        description: "Opportunity identifier"
+      - name: days_to_close
+        description: "Number of days from creation to close"
+      - name: sales_cycle_category
+        description: "Categorization of sales cycle length"
+`,
+      });
+    }
+
+    files.push({
+      path: 'models/intermediate/README.md',
+      content: `# Intermediate Models
+
+This directory contains business logic transformations that prepare data for final marts.
+
+## Structure
+- **finance/**: Financial calculations and aggregations
+- **sales/**: Sales metrics and timeline analysis
+
+## Purpose
+Intermediate models handle complex business logic that multiple marts may need,
+keeping the logic DRY and testable.
+`,
+    });
+  }
+
+  // Marts documentation
+  if (includeDocumentation) {
+    files.push({
+      path: 'models/marts/finance/schema.yml',
+      content: `version: 2
+
+models:
+  - name: fct_mrr
+    description: "Monthly recurring revenue fact table"
+    columns:
+      - name: customer_id
+        description: "Unique customer identifier"
+        tests:
+          - unique
+          - not_null
+      - name: mrr
+        description: "Monthly recurring revenue in dollars"
+
+  - name: fct_revenue_metrics
+    description: "Key revenue metrics by month"
+    columns:
+      - name: month
+        description: "Month of the metrics"
+        tests:
+          - unique
+          - not_null
+      - name: revenue
+        description: "Total revenue for the month"
+      - name: active_customers
+        description: "Number of active customers"
+      - name: arpu
+        description: "Average revenue per user"
+
+  - name: dim_customers
+    description: "Customer dimension with enriched attributes"
+    columns:
+      - name: customer_id
+        description: "Unique customer identifier"
+        tests:
+          - unique
+          - not_null
+      - name: customer_status
+        description: "Current status (active, churned, prospect)"
+      - name: lifetime_revenue
+        description: "Total revenue from customer all-time"
+`,
+    });
+
+    if (includeMultipleSources) {
+      files.push({
+        path: 'models/marts/sales/schema.yml',
+        content: `version: 2
+
+models:
+  - name: fct_sales_pipeline
+    description: "Sales pipeline fact table with opportunity details"
+    columns:
+      - name: opportunity_id
+        description: "Unique opportunity identifier"
+        tests:
+          - unique
+          - not_null
+      - name: weighted_amount
+        description: "Opportunity amount weighted by probability"
+
+  - name: dim_accounts
+    description: "Account dimension with sales metrics"
+    columns:
+      - name: account_id
+        description: "Unique account identifier"
+        tests:
+          - unique
+          - not_null
+      - name: account_status
+        description: "Status based on opportunity history"
+`,
+      });
+
+      files.push({
+        path: 'models/marts/marketing/schema.yml',
+        content: `version: 2
+
+models:
+  - name: user_engagement_summary
+    description: "User engagement metrics combining product usage and revenue"
+    columns:
+      - name: user_id
+        description: "Unique user identifier"
+        tests:
+          - unique
+          - not_null
+      - name: total_revenue
+        description: "Revenue attributed to this user"
+      - name: days_since_last_activity
+        description: "Days since last recorded activity"
+`,
+      });
+
+      files.push({
+        path: 'models/marts/sales/README.md',
+        content: `# Sales Data Mart
+
+This folder contains sales analytics models.
+
+## Models
+
+### fct_sales_pipeline
+Sales pipeline fact table with all opportunities and their current status.
+
+### dim_accounts
+Account dimension enriched with opportunity metrics and status.
+`,
+      });
+
+      files.push({
+        path: 'models/marts/marketing/README.md',
+        content: `# Marketing Data Mart
+
+This folder contains marketing and user analytics models.
+
+## Models
+
+### user_engagement_summary
+Combines product usage data with revenue to provide a complete view of user engagement.
+`,
+      });
+    }
+
+    files.push({
+      path: 'models/marts/finance/README.md',
+      content: `# Finance Data Mart
+
+This folder contains financial metrics and reporting models.
+
+## Models
+
+### fct_mrr
+Monthly recurring revenue by customer - the key SaaS metric.
+
+### fct_revenue_metrics
+Aggregated revenue metrics by month including growth rates.
+
+### dim_customers
+Customer dimension with lifetime value and status.
+`,
+    });
+  }
+
+  // Tests
+  if (includeTests) {
+    files.push({
+      path: 'tests/assert_positive_mrr.sql',
+      content: `-- Test that all MRR values are positive
+select *
+from {{ ref('fct_mrr') }}
+where mrr < 0
+`,
+    });
+
+    files.push({
+      path: 'tests/assert_revenue_metrics_complete.sql',
+      content: `-- Test that we have revenue metrics for all expected months
+with expected_months as (
+    select distinct date_trunc('month', period_start) as month
+    from {{ ref('stg_stripe__invoices') }}
+    where status = 'paid'
+),
+
+actual_months as (
+    select month
+    from {{ ref('fct_revenue_metrics') }}
+)
+
+select e.month
+from expected_months e
+left join actual_months a on e.month = a.month
+where a.month is null
+`,
+    });
+
+    if (includeMultipleSources) {
+      files.push({
+        path: 'tests/assert_opportunity_account_integrity.sql',
+        content: `-- Test that all opportunities have valid accounts
+select o.*
+from {{ ref('stg_salesforce__opportunities') }} o
+left join {{ ref('stg_salesforce__accounts') }} a
+    on o.account_id = a.account_id
+where a.account_id is null
+`,
+      });
+    }
+  }
+
+  // Macros
+  if (includeMacros) {
+    files.push({
+      path: 'macros/cents_to_dollars.sql',
+      content: `{% macro cents_to_dollars(column_name) %}
+    {{ column_name }} / 100.0
+{% endmacro %}
+`,
+    });
+
+    files.push({
+      path: 'macros/generate_date_spine.sql',
+      content: `{% macro generate_date_spine(start_date, end_date) %}
+    {{ dbt_utils.date_spine(
+        datepart="day",
+        start_date=start_date,
+        end_date=end_date
+    ) }}
+{% endmacro %}
+`,
+    });
+  }
+
+  // Additional project files
+  files.push({
+    path: '.gitignore',
+    content: `target/
+dbt_packages/
+logs/
+.DS_Store
+*.log
+.env
+`,
+  });
+
+  files.push({
+    path: 'packages.yml',
+    content: `packages:
+  - package: dbt-labs/dbt_utils
+    version: 1.0.0
+`,
+  });
+
+  return files;
+}
+
+// Helper to generate variations of the project
+export function generateProjectVariations(): Record<string, FileInput[]> {
+  return {
+    minimal: generateMockDbtProject({
+      includeDocumentation: false,
+      includeTests: false,
+      includeMacros: false,
+      includeIntermediateModels: false,
+      includeMultipleSources: false,
+    }),
+    withoutDocs: generateMockDbtProject({
+      includeDocumentation: false,
+      includeTests: true,
+      includeMacros: true,
+      includeIntermediateModels: true,
+      includeMultipleSources: true,
+    }),
+    complete: generateMockDbtProject({
+      includeDocumentation: true,
+      includeTests: true,
+      includeMacros: true,
+      includeIntermediateModels: true,
+      includeMultipleSources: true,
+    }),
+  };
+}
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/mock-sandbox.ts b/packages/ai/src/workflows/docs-agent/test-helpers/mock-sandbox.ts
new file mode 100644
index 000000000..ea9982a9c
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/mock-sandbox.ts
@@ -0,0 +1,378 @@
+import type { Sandbox } from '@buster/sandbox';
+
+/**
+ * Creates a mock sandbox for testing without requiring Daytona API
+ */
+export function createMockSandbox(): Sandbox {
+  const mockId = `mock-sandbox-${Date.now()}`;
+
+  const fileStorage = new Map<string, string | Buffer>();
+
+  const mockSandbox: Sandbox = {
+    id: mockId,
+
+    fs: {
+      uploadFile: async (content: Buffer | string, path: string) => {
+        // Preserve full path structure
+        fileStorage.set(path, content);
+        return Promise.resolve();
+      },
+
+      uploadFiles: async (files: Array<{ source: Buffer; destination: string }>) => {
+        for (const file of files) {
+          // Preserve full path structure
+          fileStorage.set(file.destination, file.source);
+        }
+        return Promise.resolve();
+      },
+
+      createFolder: async (_path: string, _permissions?: string) => {
+        // Mock folder creation - folders are implicit in our file storage
+        return Promise.resolve();
+      },
+
+      readFile: async (path: string) => {
+        const content = fileStorage.get(path);
+        if (!content) {
+          throw new Error(`File not found: ${path}`);
+        }
+        return content;
+      },
+
+      listDirectory: async (path: string) => {
+        const normalizedPath = path.endsWith('/') ? path : `${path}/`;
+        const files = new Set<string>();
+
+        for (const filePath of fileStorage.keys()) {
+          // Check if file is in the requested directory
+          if (filePath.startsWith(normalizedPath)) {
+            // Get the relative path from the directory
+            const relativePath = filePath.slice(normalizedPath.length);
+            // Get only the immediate child (file or directory)
+            const parts = relativePath.split('/');
+            if (parts[0]) {
+              files.add(parts[0]);
+            }
+          }
+        }
+
+        return Array.from(files);
+      },
+
+      deleteFile: async (path: string) => {
+        fileStorage.delete(path);
+        return Promise.resolve();
+      },
+
+      exists: async (path: string) => {
+        // Check if it's a file
+        if (fileStorage.has(path)) {
+          return true;
+        }
+        // Check if it's a directory (has files with this prefix)
+        const normalizedPath = path.endsWith('/') ? path : `${path}/`;
+        for (const filePath of fileStorage.keys()) {
+          if (filePath.startsWith(normalizedPath)) {
+            return true;
+          }
+        }
+        return false;
+      },
+
+      stat: async (path: string) => {
+        const exists = fileStorage.has(path);
+        if (!exists) {
+          throw new Error(`File not found: ${path}`);
+        }
+        return {
+          isFile: true,
+          isDirectory: false,
+          size: (fileStorage.get(path) as Buffer | string)?.length || 0,
+          mtime: new Date(),
+        };
+      },
+    },
+
+    exec: async (command: string) => {
+      // Mock command execution with special handling for typescript execution
+      if (command.includes('node') && command.includes('.ts')) {
+        // This is likely a TypeScript execution from runTypescript
+        // Extract the code and execute it
+        try {
+          // For grep search simulation
+          if (command.includes('grep-search')) {
+            const searchResults = [];
+            for (const [filePath, content] of Array.from(fileStorage.entries())) {
+              if (filePath.endsWith('.sql') || filePath.endsWith('.yml')) {
+                searchResults.push({
+                  success: true,
+                  path: filePath,
+                  pattern: '.*',
+                  matches: [
+                    {
+                      file: filePath,
+                      lineNumber: 1,
+                      content: content.toString().split('\n')[0] || '',
+                    },
+                  ],
+                  matchCount: 1,
+                });
+              }
+            }
+            return {
+              stdout: JSON.stringify(searchResults),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+
+          // For read files simulation
+          if (command.includes('read-files')) {
+            const results = [];
+            // Parse which files are being requested - this is a simplified mock
+            for (const [filePath, content] of Array.from(fileStorage.entries())) {
+              results.push({
+                success: true,
+                filePath: filePath,
+                content: content.toString(),
+                truncated: false,
+              });
+            }
+            return {
+              stdout: JSON.stringify(results),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+        } catch (error) {
+          return {
+            stdout: '',
+            stderr: error instanceof Error ? error.message : 'Mock execution error',
+            exitCode: 1,
+          };
+        }
+      }
+
+      return {
+        stdout: `Mock execution of: ${command}`,
+        stderr: '',
+        exitCode: 0,
+      };
+    },
+
+    close: async () => {
+      // Mock cleanup
+      fileStorage.clear();
+      return Promise.resolve();
+    },
+
+    // Additional mock methods as needed
+    process: {
+      run: async (command: string) => {
+        return {
+          stdout: `Mock run: ${command}`,
+          stderr: '',
+          exitCode: 0,
+        };
+      },
+      codeRun: async (code: string, _options?: Record<string, unknown>, _timeout?: number) => {
+        // Mock TypeScript code execution
+        try {
+          // Handle ls files operations
+          if (code.includes('lsFilesConcurrently') || code.includes('ls ')) {
+            const results = [];
+
+            // Extract paths from the code
+            const pathsMatch = code.match(/const paths = (\[.*?\]);/s);
+            if (pathsMatch) {
+              const paths = JSON.parse(pathsMatch[1]);
+
+              for (const requestedPath of paths) {
+                const normalizedPath = requestedPath.endsWith('/')
+                  ? requestedPath.slice(0, -1)
+                  : requestedPath;
+                const entries: Array<{ name: string; type: string; size?: string }> = [];
+
+                // Check if path exists as a directory
+                let isDirectory = false;
+                const dirPath = normalizedPath.endsWith('/')
+                  ? normalizedPath
+                  : `${normalizedPath}/`;
+
+                for (const filePath of fileStorage.keys()) {
+                  if (filePath.startsWith(dirPath) || filePath === normalizedPath) {
+                    isDirectory = true;
+                    break;
+                  }
+                }
+
+                if (!isDirectory && !fileStorage.has(normalizedPath)) {
+                  results.push({
+                    success: false,
+                    path: requestedPath,
+                    error: 'Path not found',
+                  });
+                  continue;
+                }
+
+                // List files in directory
+                for (const [filePath, content] of Array.from(fileStorage.entries())) {
+                  if (filePath.startsWith(dirPath) && filePath !== normalizedPath) {
+                    const relativePath = filePath.slice(dirPath.length);
+                    const parts = relativePath.split('/');
+                    if (parts[0] && !entries.find((e) => e.name === parts[0])) {
+                      const entry: { name: string; type: string; size?: string } = {
+                        name: parts[0],
+                        type: parts.length > 1 ? 'directory' : 'file',
+                      };
+                      if (parts.length === 1) {
+                        entry.size = content.toString().length.toString();
+                      }
+                      entries.push(entry);
+                    }
+                  } else if (filePath === normalizedPath) {
+                    // It's a file
+                    entries.push({
+                      name: filePath.split('/').pop() || filePath,
+                      type: 'file',
+                      size: content.toString().length.toString(),
+                    });
+                  }
+                }
+
+                results.push({
+                  success: true,
+                  path: requestedPath,
+                  entries: entries.length > 0 ? entries : undefined,
+                });
+              }
+            }
+
+            return {
+              result: JSON.stringify(results),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+
+          // Simple pattern matching for grep search code
+          if (code.includes('executeGrepSearch') || code.includes('grep')) {
+            const searchResults = [];
+
+            // Check if searching in dbt_project directory
+            const pathMatch = code.match(/path:\s*["']([^"']+)["']/);
+            const searchPath = pathMatch ? pathMatch[1] : '.';
+
+            for (const [filePath, content] of Array.from(fileStorage.entries())) {
+              // Only include files that match the search path
+              if (searchPath === '.' || (searchPath && filePath.startsWith(searchPath))) {
+                if (filePath.endsWith('.sql') || filePath.endsWith('.yml')) {
+                  searchResults.push({
+                    success: true,
+                    path: searchPath,
+                    pattern: '.*',
+                    matches: [
+                      {
+                        file: filePath,
+                        lineNumber: 1,
+                        content: content.toString().split('\n')[0] || '',
+                      },
+                    ],
+                    matchCount: 1,
+                  });
+                }
+              }
+            }
+
+            return {
+              result: JSON.stringify(searchResults),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+
+          // Simple pattern matching for read files code
+          if (code.includes('readFile') || code.includes('readFiles')) {
+            const results = [];
+
+            // Try to extract file paths from the code
+            const filePathMatches = code.matchAll(/["']([^"']+\.(sql|yml|md))["']/g);
+            const requestedFiles = Array.from(filePathMatches).map((match) => match[1]);
+
+            if (requestedFiles.length > 0) {
+              // Return specific files requested
+              for (const requestedFile of requestedFiles) {
+                if (requestedFile) {
+                  const content = fileStorage.get(requestedFile);
+                  if (content) {
+                    results.push({
+                      success: true,
+                      filePath: requestedFile,
+                      content: content.toString(),
+                      truncated: false,
+                    });
+                  } else {
+                    results.push({
+                      success: false,
+                      filePath: requestedFile,
+                      error: `File not found: ${requestedFile}`,
+                    });
+                  }
+                }
+              }
+            } else {
+              // Return all files if no specific files requested
+              for (const [filePath, content] of Array.from(fileStorage.entries())) {
+                results.push({
+                  success: true,
+                  filePath: filePath,
+                  content: content.toString(),
+                  truncated: false,
+                });
+              }
+            }
+
+            return {
+              result: JSON.stringify(results),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+
+          // Simple pattern matching for create/edit files code
+          if (
+            code.includes('writeFile') ||
+            code.includes('createFile') ||
+            code.includes('fs.write')
+          ) {
+            // Simulate successful file creation
+            return {
+              result: JSON.stringify({
+                success: true,
+                message: 'Files created successfully',
+                filesCreated: 1,
+              }),
+              stderr: '',
+              exitCode: 0,
+            };
+          }
+
+          // Default mock response
+          return {
+            result: 'Mock TypeScript execution completed',
+            stderr: '',
+            exitCode: 0,
+          };
+        } catch (error) {
+          return {
+            result: '',
+            stderr: error instanceof Error ? error.message : 'Mock execution error',
+            exitCode: 1,
+          };
+        }
+      },
+    },
+  } as unknown as Sandbox;
+
+  return mockSandbox;
+}
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts b/packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts
new file mode 100644
index 000000000..645a21628
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts
@@ -0,0 +1,143 @@
+#!/usr/bin/env tsx
+/**
+ * Example runner to demonstrate how to use the docs agent test helpers
+ * Run this file with: tsx packages/ai/src/workflows/docs-agent/test-helpers/run-example.ts
+ */
+
+import { currentSpan, initLogger, wrapTraced } from 'braintrust';
+import docsAgentWorkflow from '../docs-agent-workflow';
+import { TEST_MESSAGES, createTestContext, createTestWorkflowInput } from './context-helpers';
+import {
+  addFilesToSandbox,
+  createFilesWithMissingDocs,
+  createIntegrationTestSandbox,
+} from './sandbox-helpers';
+
+async function runExample() {
+  console.info('🚀 Starting docs agent example...\n');
+
+  // Initialize Braintrust logger if key is available
+  let braintrustLogger = null;
+  if (process.env.BRAINTRUST_KEY) {
+    braintrustLogger = initLogger({
+      apiKey: process.env.BRAINTRUST_KEY,
+      projectName: 'DOCS-AGENT',
+    });
+    console.info('✅ Braintrust logging enabled\n');
+  } else {
+    console.warn('⚠️  No BRAINTRUST_KEY found, running without logging\n');
+  }
+
+  let testSandbox: Awaited<ReturnType<typeof createIntegrationTestSandbox>> | null = null;
+
+  try {
+    // Step 1: Create a test sandbox with mock dbt project
+    console.info('📦 Creating test sandbox with mock dbt project...');
+    testSandbox = await createIntegrationTestSandbox({
+      projectOptions: {
+        projectName: 'example_analytics',
+        companyName: 'ExampleCo',
+        includeDocumentation: false, // Start without docs
+        includeTests: true,
+        includeMacros: true,
+      },
+    });
+    console.info(`✅ Sandbox created: ${testSandbox.sandboxId}\n`);
+
+    // Step 2: Add some files that need documentation
+    console.info('📄 Adding files with missing documentation...');
+    await addFilesToSandbox(
+      testSandbox.sandbox,
+      createFilesWithMissingDocs(),
+      testSandbox.projectPath
+    );
+    console.info('✅ Additional files added\n');
+
+    // Step 3: Create context and input
+    const context = createTestContext({
+      sandbox: testSandbox.sandbox,
+    });
+
+    const input = createTestWorkflowInput({
+      message: TEST_MESSAGES.documentAll,
+      organizationId: 'example-org-123',
+      context,
+    });
+
+    // Step 4: Run the workflow
+    console.info('🤖 Running docs agent workflow...');
+    console.info(`Message: "${input.message}"\n`);
+
+    const startTime = Date.now();
+
+    const result = braintrustLogger
+      ? await wrapTraced(
+          async () => {
+            currentSpan().log({
+              metadata: {
+                exampleRun: true,
+                sandboxId: testSandbox?.sandboxId,
+                projectName: 'example_analytics',
+              },
+            });
+
+            const run = docsAgentWorkflow.createRun();
+            return await run.start({ inputData: input });
+          },
+          { name: 'Docs Agent Example Run' }
+        )()
+      : await docsAgentWorkflow.createRun().start({ inputData: input });
+
+    const duration = Date.now() - startTime;
+
+    // Step 5: Display results
+    console.info('\n📊 Workflow Results:');
+    console.info(`✅ Completed in ${duration}ms`);
+    console.info('\nOutput:');
+    console.info(JSON.stringify(result, null, 2));
+
+    if (result.status === 'success' && result.result.documentationCreated) {
+      console.info(
+        `\n✅ Documentation created! ${result.result.metadata?.filesCreated || 0} files written`
+      );
+      if (result.result.metadata?.toolsUsed) {
+        console.info(`Tools used: ${result.result.metadata.toolsUsed.join(', ')}`);
+      }
+    }
+
+    if (result.status === 'success' && result.result.clarificationNeeded) {
+      console.info('\n❓ Clarification needed:');
+      console.info(`Issue: ${result.result.clarificationQuestion?.issue}`);
+      console.info(`Question: ${result.result.clarificationQuestion?.clarificationQuestion}`);
+    }
+
+    if (result.status === 'success' && result.result.todos) {
+      console.info(`\n📝 Generated ${result.result.todos.length} todos`);
+    }
+  } catch (error) {
+    console.error('\n❌ Error running example:', error);
+    throw error;
+  } finally {
+    // Cleanup
+    if (testSandbox) {
+      console.info('\n🧹 Cleaning up sandbox...');
+      await testSandbox.cleanup();
+      console.info('✅ Cleanup complete');
+    }
+
+    if (braintrustLogger) {
+      await braintrustLogger.flush();
+    }
+  }
+}
+
+// Run the example
+runExample()
+  .then(() => {
+    console.info('\n🎉 Example completed successfully!');
+    process.exit(0);
+  })
+  .catch((error) => {
+    console.error('\n💥 Example failed:', error);
+    process.exit(1);
+  });
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/sandbox-helpers.ts b/packages/ai/src/workflows/docs-agent/test-helpers/sandbox-helpers.ts
new file mode 100644
index 000000000..f9eb0a9c5
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/sandbox-helpers.ts
@@ -0,0 +1,343 @@
+import { promises as fs } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import { type FileInput, type Sandbox, addFiles, createSandbox } from '@buster/sandbox';
+import { type MockDbtProjectOptions, generateMockDbtProject } from './mock-dbt-project';
+import { createMockSandbox } from './mock-sandbox';
+
+export interface TestSandboxOptions {
+  projectOptions?: MockDbtProjectOptions;
+  additionalFiles?: FileInput[];
+  baseDir?: string;
+}
+
+export interface TestSandboxResult {
+  sandbox: Sandbox;
+  sandboxId: string;
+  projectPath: string;
+  cleanup: () => Promise<void>;
+}
+
+/**
+ * Creates a test sandbox with a mock dbt project (always uses in-memory mock sandbox)
+ * Use this for unit tests that don't need real sandbox functionality
+ */
+export async function createTestSandbox(
+  options: TestSandboxOptions = {}
+): Promise<TestSandboxResult> {
+  const { projectOptions = {}, additionalFiles = [], baseDir = 'dbt_project' } = options;
+
+  // Always use mock sandbox for unit tests
+  const sandbox = createMockSandbox();
+  const sandboxId = sandbox.id;
+
+  // Generate mock project files
+  const projectFiles = generateMockDbtProject(projectOptions);
+  const allFiles = [...projectFiles, ...additionalFiles];
+
+  // Upload files to sandbox
+  console.log(`[TestSandbox] Uploading ${allFiles.length} files to ${baseDir} directory`);
+  console.log('[TestSandbox] Files being uploaded:', allFiles.map(f => f.path).slice(0, 10));
+  
+  const uploadResult = await addFiles(sandbox, allFiles, {
+    baseDestination: baseDir,
+    overwrite: true,
+  });
+
+  if (!uploadResult.success) {
+    throw new Error(
+      `Failed to upload files to sandbox: ${JSON.stringify(uploadResult.failedFiles)}`
+    );
+  }
+  
+  console.log(`[TestSandbox] Successfully uploaded files to ${baseDir}`);
+  console.log('[TestSandbox] Uploaded files count:', uploadResult.uploadedFiles.length);
+
+  // Return sandbox info with cleanup function
+  return {
+    sandbox,
+    sandboxId,
+    projectPath: baseDir,
+    cleanup: async () => {
+      // Mock sandbox cleanup - storage cleared in memory
+      // No explicit close needed for mock sandbox
+    },
+  };
+}
+
+/**
+ * Creates a real sandbox with a mock dbt project for integration testing
+ * This will use actual Daytona sandbox when DAYTONA_API_KEY is available,
+ * otherwise falls back to mock sandbox
+ */
+export async function createIntegrationTestSandbox(
+  options: TestSandboxOptions = {}
+): Promise<TestSandboxResult> {
+  const { projectOptions = {}, additionalFiles = [], baseDir = 'dbt_project' } = options;
+
+  let sandbox: Sandbox;
+  try {
+    // Try to create real sandbox if Daytona is available
+    sandbox = await createSandbox({ language: 'typescript' });
+    console.info('Using real Daytona sandbox for integration test');
+  } catch (_error) {
+    // Fall back to mock sandbox if Daytona is not available
+    console.warn('Daytona not available, using mock sandbox for integration test');
+    sandbox = createMockSandbox();
+  }
+
+  const sandboxId = sandbox.id;
+
+  // Generate mock project files
+  const projectFiles = generateMockDbtProject(projectOptions);
+  const allFiles = [...projectFiles, ...additionalFiles];
+
+  // Upload files to sandbox
+  console.log(`[TestSandbox] Uploading ${allFiles.length} files to ${baseDir} directory`);
+  console.log('[TestSandbox] Files being uploaded:', allFiles.map(f => f.path).slice(0, 10));
+  
+  const uploadResult = await addFiles(sandbox, allFiles, {
+    baseDestination: baseDir,
+    overwrite: true,
+  });
+
+  if (!uploadResult.success) {
+    throw new Error(
+      `Failed to upload files to sandbox: ${JSON.stringify(uploadResult.failedFiles)}`
+    );
+  }
+  
+  console.log(`[TestSandbox] Successfully uploaded files to ${baseDir}`);
+  console.log('[TestSandbox] Uploaded files count:', uploadResult.uploadedFiles.length);
+
+  // Return sandbox info with cleanup function
+  return {
+    sandbox,
+    sandboxId,
+    projectPath: baseDir,
+    cleanup: async () => {
+      // Try to close real sandbox if it has the method
+      if ('close' in sandbox && typeof sandbox.close === 'function') {
+        try {
+          await sandbox.close();
+        } catch (error) {
+          console.error('Error closing sandbox:', error);
+        }
+      }
+    },
+  };
+}
+
+/**
+ * Creates a local temporary directory with a mock dbt project for testing
+ */
+export async function createLocalTestProject(
+  options: MockDbtProjectOptions = {}
+): Promise<{ projectPath: string; cleanup: () => Promise<void> }> {
+  const tempDir = path.join(tmpdir(), `dbt-test-${Date.now()}`);
+  await fs.mkdir(tempDir, { recursive: true });
+
+  const projectFiles = generateMockDbtProject(options);
+
+  // Write all files to temp directory
+  for (const file of projectFiles) {
+    const filePath = path.join(tempDir, file.path);
+    const fileDir = path.dirname(filePath);
+
+    await fs.mkdir(fileDir, { recursive: true });
+
+    const content =
+      typeof file.content === 'string' ? file.content : file.content?.toString() || '';
+
+    await fs.writeFile(filePath, content, 'utf-8');
+  }
+
+  return {
+    projectPath: tempDir,
+    cleanup: async () => {
+      try {
+        await fs.rm(tempDir, { recursive: true, force: true });
+      } catch (error) {
+        console.error('Failed to cleanup temp directory:', error);
+      }
+    },
+  };
+}
+
+/**
+ * Helper to add additional files to an existing sandbox
+ */
+export async function addFilesToSandbox(
+  sandbox: Sandbox,
+  files: FileInput[],
+  baseDir = 'dbt_project'
+): Promise<void> {
+  const uploadResult = await addFiles(sandbox, files, {
+    baseDestination: baseDir,
+    overwrite: true,
+  });
+
+  if (!uploadResult.success) {
+    throw new Error(`Failed to add files to sandbox: ${JSON.stringify(uploadResult.failedFiles)}`);
+  }
+}
+
+/**
+ * Helper to create malformed YAML files for error testing
+ */
+export function createMalformedYamlFiles(): FileInput[] {
+  return [
+    {
+      path: 'models/staging/malformed_schema.yml',
+      content: `version: 2
+
+models:
+  - name: test_model
+    description: "Test model with malformed YAML"
+    columns:
+      - name: id
+        description: "Missing closing quote
+        tests:
+          - unique
+      - name: invalid
+        tests: [not_null
+`,
+    },
+    {
+      path: 'dbt_project.yml',
+      content: `name: 'broken_project
+version: '1.0.0'
+  invalid_indentation:
+    - mixed tabs and spaces
+	  - this will break
+`,
+    },
+  ];
+}
+
+/**
+ * Helper to create files with missing documentation
+ */
+export function createFilesWithMissingDocs(): FileInput[] {
+  return [
+    {
+      path: 'models/staging/undocumented/users.sql',
+      content: `select
+    id,
+    email,
+    created_at,
+    updated_at,
+    is_active
+from {{ source('app', 'users') }}
+`,
+    },
+    {
+      path: 'models/staging/undocumented/orders.sql',
+      content: `select
+    id,
+    user_id,
+    order_date,
+    total_amount,
+    status
+from {{ source('app', 'orders') }}
+`,
+    },
+    {
+      path: 'models/staging/undocumented/schema.yml',
+      content: `version: 2
+
+models:
+  - name: users
+    # Missing description and column documentation
+    columns:
+      - name: id
+        tests:
+          - unique
+          - not_null
+      - name: email
+      - name: created_at
+      
+  - name: orders
+    # Missing all documentation
+`,
+    },
+  ];
+}
+
+/**
+ * Helper to create a complex project structure for testing
+ */
+export function createComplexProjectStructure(): FileInput[] {
+  const files: FileInput[] = [];
+
+  // Multiple data sources
+  const dataSources = ['stripe', 'salesforce', 'postgres', 'snowplow'];
+
+  for (const source of dataSources) {
+    files.push({
+      path: `models/staging/${source}/README.md`,
+      content: `# ${source} Staging Models
+
+This directory contains staging models for ${source} data.
+`,
+    });
+
+    files.push({
+      path: `models/staging/${source}/schema.yml`,
+      content: `version: 2
+
+sources:
+  - name: ${source}
+    database: raw
+    schema: ${source}
+    tables:
+      - name: table1
+      - name: table2
+`,
+    });
+  }
+
+  // Multiple business domains
+  const domains = ['finance', 'marketing', 'operations', 'product'];
+
+  for (const domain of domains) {
+    files.push({
+      path: `models/marts/${domain}/README.md`,
+      content: `# ${domain} Data Mart
+
+Business logic for ${domain} analytics.
+`,
+    });
+  }
+
+  // Analysis files
+  files.push({
+    path: 'analyses/customer_churn_analysis.sql',
+    content: `-- Customer churn analysis
+with churned_customers as (
+    select * from {{ ref('dim_customers') }}
+    where churned_at is not null
+)
+select * from churned_customers
+`,
+  });
+
+  // Snapshots
+  files.push({
+    path: 'snapshots/customers_snapshot.sql',
+    content: `{% snapshot customers_snapshot %}
+    {{
+        config(
+          target_schema='snapshots',
+          unique_key='id',
+          strategy='timestamp',
+          updated_at='updated_at',
+        )
+    }}
+    select * from {{ source('app', 'customers') }}
+{% endsnapshot %}
+`,
+  });
+
+  return files;
+}
diff --git a/packages/ai/src/workflows/docs-agent/test-helpers/simple-test.ts b/packages/ai/src/workflows/docs-agent/test-helpers/simple-test.ts
new file mode 100644
index 000000000..e62f5da60
--- /dev/null
+++ b/packages/ai/src/workflows/docs-agent/test-helpers/simple-test.ts
@@ -0,0 +1,90 @@
+#!/usr/bin/env tsx
+/**
+ * Simple test to verify the test helpers work correctly
+ * Run with: tsx packages/ai/src/workflows/docs-agent/test-helpers/simple-test.ts
+ */
+
+import { addFiles } from '@buster/sandbox';
+import { TEST_MESSAGES, createTestContext, createTestWorkflowInput } from './context-helpers';
+import { generateMockDbtProject } from './mock-dbt-project';
+import { createMockSandbox } from './mock-sandbox';
+import { createFilesWithMissingDocs, createMalformedYamlFiles } from './sandbox-helpers';
+
+async function runSimpleTest() {
+  console.info('🧪 Running simple test of docs agent test helpers...\n');
+
+  try {
+    // Test 1: Mock sandbox creation
+    console.info('1️⃣ Testing mock sandbox creation...');
+    const mockSandbox = createMockSandbox();
+    console.info(`✅ Created mock sandbox with ID: ${mockSandbox.id}\n`);
+
+    // Test 2: Generate mock dbt project
+    console.info('2️⃣ Testing mock dbt project generation...');
+    const projectFiles = generateMockDbtProject({
+      projectName: 'test_project',
+      companyName: 'TestCo',
+    });
+    console.info(`✅ Generated ${projectFiles.length} project files\n`);
+
+    // Test 3: Upload files to mock sandbox
+    console.info('3️⃣ Testing file upload to mock sandbox...');
+    const uploadResult = await addFiles(mockSandbox, projectFiles, {
+      baseDestination: 'dbt_test',
+    });
+    console.info(`✅ Upload result: ${uploadResult.success ? 'Success' : 'Failed'}`);
+    console.info(`   Files uploaded: ${uploadResult.uploadedFiles.length}\n`);
+
+    // Test 4: Create test context
+    console.info('4️⃣ Testing context creation...');
+    const context = createTestContext({
+      sandbox: mockSandbox,
+      todoList: ['Document staging models', 'Update READMEs'],
+    });
+    console.info(`✅ Created context with ${context.todoList.length} todos\n`);
+
+    // Test 5: Create workflow input
+    console.info('5️⃣ Testing workflow input creation...');
+    const input = createTestWorkflowInput({
+      message: TEST_MESSAGES.documentAll,
+      context,
+    });
+    console.info(`✅ Created workflow input with message: "${input.message}"\n`);
+
+    // Test 6: Test file variations
+    console.info('6️⃣ Testing file variations...');
+    const malformedFiles = createMalformedYamlFiles();
+    console.info(`   Created ${malformedFiles.length} malformed YAML files`);
+
+    const missingDocsFiles = createFilesWithMissingDocs();
+    console.info(`   Created ${missingDocsFiles.length} files with missing docs`);
+
+    // Test 7: List files in mock sandbox
+    console.info('\n7️⃣ Testing file listing in mock sandbox...');
+    const files = await (
+      mockSandbox.fs as unknown as { listDirectory: (path: string) => Promise<string[]> }
+    ).listDirectory('dbt_test');
+    console.info(`✅ Found ${files.length} files in sandbox`);
+    console.info('   Sample files:', files.slice(0, 5).join(', '), '...\n');
+
+    // Cleanup
+    // Mock sandbox doesn't have a close method, but we can clear the storage
+    console.info('🧹 Cleaned up mock sandbox\n');
+
+    console.info('✅ All tests passed successfully!');
+  } catch (error) {
+    console.error('❌ Test failed:', error);
+    process.exit(1);
+  }
+}
+
+// Run the test
+runSimpleTest()
+  .then(() => {
+    console.info('\n🎉 Simple test completed!');
+    process.exit(0);
+  })
+  .catch((error) => {
+    console.error('\n💥 Test error:', error);
+    process.exit(1);
+  });
diff --git a/packages/sandbox/src/filesystem/add-files.ts b/packages/sandbox/src/filesystem/add-files.ts
index 6d60552f8..2c98bf5ec 100644
--- a/packages/sandbox/src/filesystem/add-files.ts
+++ b/packages/sandbox/src/filesystem/add-files.ts
@@ -145,7 +145,7 @@ export async function uploadMultipleFiles(
           : file.content
         : await fs.readFile(file.path);
 
-      const destination = file.destination || path.basename(file.path);
+      const destination = file.destination || file.path;
       const destPath = options?.baseDestination
         ? joinPaths(options.baseDestination, destination)
         : destination;
@@ -322,7 +322,7 @@ export async function addFiles(
       return uploadSingleFile(
         sandbox,
         fileInput.path,
-        fileInput.destination || path.basename(fileInput.path),
+        fileInput.destination || fileInput.path,
         options
       );
     }