diff --git a/packages/ai/package.json b/packages/ai/package.json
index 23df39137..011f1a622 100644
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@@ -29,8 +29,12 @@
"eval:metrics": "npx braintrust eval evals/agents/analyst-agent/metrics",
"eval:post-processing": "npx braintrust eval evals/agents/post-processing-agent",
"eval:golden": "npx braintrust eval evals/golden-dataset",
+ "eval:charting": "npx braintrust eval evals/agents/analyst-agent/chart-formatting",
+ "eval:tool-order": "npx braintrust eval evals/agents/analyst-agent/tool-order",
"braintrust:push": "npx braintrust push evals/agents/analyst-agent/metrics/test_scorers.ts",
- "braintrust:push:staged": "npx braintrust push evals/agents/analyst-agent/metrics/staged_scorers.ts"
+ "braintrust:push:metric:staged": "npx braintrust push evals/agents/analyst-agent/metrics/staged_scorers.ts",
+ "braintrust:push:chart:staged": "npx braintrust push evals/agents/analyst-agent/chart-formatting/staged-scorers.ts",
+ "braintrust:push:tools:staged": "npx braintrust push evals/agents/analyst-agent/tool-order/staged-scorers.ts"
},
"dependencies": {
"@ai-sdk/anthropic": "^1.2.12",
diff --git a/packages/ai/src/agents/analyst-agent/analyst-agent-instructions.ts b/packages/ai/src/agents/analyst-agent/analyst-agent-instructions.ts
index f857a56d8..534a92c73 100644
--- a/packages/ai/src/agents/analyst-agent/analyst-agent-instructions.ts
+++ b/packages/ai/src/agents/analyst-agent/analyst-agent-instructions.ts
@@ -252,7 +252,9 @@ ${params.sqlDialectGuidance}
- If the user asks for something that hasn't been created yet (like a different chart or a metric you haven't made yet) create a new metric
-- If the user wants to change something you've already built (like switching a chart from monthly to weekly data or adding a filter) just update the existing metric, don't create a new one
+- If the user wants to change something you've already built (like switching a chart from monthly to weekly data or adding a filter) just update the existing metric, don't create a new one unless the user specifically asks for you to recreate it.
+- If the user says, 'Hey Buster. Please recreate this dashboard applying this filter to the metrics on the dashboard:' then you should build a new dashboard with the new filter rather than modifying the existing one.
+- If the user says, 'Hey Buster. Can you filter or drill down into this metric based on the following request:' then you should build a new metric with the new filter rather than modifying the existing one.
diff --git a/packages/ai/src/agents/think-and-prep-agent/think-and-prep-instructions.ts b/packages/ai/src/agents/think-and-prep-agent/think-and-prep-instructions.ts
index e27edee60..f00c27234 100644
--- a/packages/ai/src/agents/think-and-prep-agent/think-and-prep-instructions.ts
+++ b/packages/ai/src/agents/think-and-prep-agent/think-and-prep-instructions.ts
@@ -262,7 +262,7 @@ Once all TODO list items are addressed and submitted for review, the system will
- Document each assumption in your thoughts using the \`sequentialThinking\` tool (e.g., "Assuming 'sales' refers to sales_amount column")
- If requested data isn't in the documentation, conclude that it doesn't exist and the request cannot be fulfilled:
- Do not submit your thoughts for review
- - Inform the user that the data does not exist via \`respondWithoutAssetCreation\`
+ - Inform the user that you do not currently have access to the data via \`respondWithoutAssetCreation\` and explain what you do have access to.