buster/packages/ai/evals/agents/analyst-agent/metrics/think-and-prep-updates.ts

import { RuntimeContext } from '@mastra/core/runtime-context';
import { Eval, initDataset } from 'braintrust';
import analystWorkflow, {
  type AnalystRuntimeContext,
} from '../../../../src/workflows/analyst-workflow';

const runAnalystWorkflow = async (input: string) => {
  const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();
  runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');
  runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');
  runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');
  runtimeContext.set('dataSourceSyntax', 'postgresql');

  const run = analystWorkflow.createRun();

  const response = await run.start({
    inputData: { prompt: input },
    runtimeContext,
  });

  if (response.status === 'failed') {
    throw new Error(`Workflow failed: ${response.error}`);
  }

  const formatOutputStep = response.steps['format-output'];
  if (formatOutputStep.status === 'failed') {
    throw new Error(`Format output step failed: ${formatOutputStep.error}`);
  }

  if (formatOutputStep.status === 'success') {
    return formatOutputStep.output.outputMessages || [];
  }

  return [];
};

// Your experiment configuration
Eval('development', {
  experimentName: 'random-think-and-prep-updates',
  data: initDataset({
    project: 'development',
    dataset: 'Random-Tests',
  }),
  task: runAnalystWorkflow,
  scores: [], // No scoring functions for now - just running the experiment
  maxConcurrency: 10, // Run up to 10 tests at the same time
});
Refactor think-and-prep agent tools and update scoring functions - Renamed `respondWithoutAnalysis` to `respondWithoutAssetCreation` across multiple files to better reflect its purpose. - Updated the `think-and-prep-instructions.ts` to clarify the agent's focus on asset creation instead of analysis. - Added a new file `think-and-prep-updates.ts` to implement the analyst workflow with improved error handling and response management. - Commented out legacy code in `example_scorers.ts` and `metric.eval.ts` for clarity and future reference. - Adjusted the `sequential-thinking-tool` to remove unnecessary fields and streamline the thought processing logic. This refactor enhances the clarity and functionality of the think-and-prep agent, aligning it with the current project goals. 2025-07-23 01:56:40 +08:00			`import { RuntimeContext } from '@mastra/core/runtime-context';`
			`import { Eval, initDataset } from 'braintrust';`
			`import analystWorkflow, {`
			`type AnalystRuntimeContext,`
			`} from '../../../../src/workflows/analyst-workflow';`

			`const runAnalystWorkflow = async (input: string) => {`
			`const runtimeContext = new RuntimeContext<AnalystRuntimeContext>();`
			`runtimeContext.set('userId', 'c2dd64cd-f7f3-4884-bc91-d46ae431901e');`
			`runtimeContext.set('organizationId', 'bf58d19a-8bb9-4f1d-a257-2d2105e7f1ce');`
			`runtimeContext.set('dataSourceId', 'cc3ef3bc-44ec-4a43-8dc4-681cae5c996a');`
			`runtimeContext.set('dataSourceSyntax', 'postgresql');`

			`const run = analystWorkflow.createRun();`

			`const response = await run.start({`
			`inputData: { prompt: input },`
			`runtimeContext,`
			`});`

			`if (response.status === 'failed') {`
			throw new Error(`Workflow failed: ${response.error}`);
			`}`

			`const formatOutputStep = response.steps['format-output'];`
			`if (formatOutputStep.status === 'failed') {`
			throw new Error(`Format output step failed: ${formatOutputStep.error}`);
			`}`

			`if (formatOutputStep.status === 'success') {`
			`return formatOutputStep.output.outputMessages \|\| [];`
			`}`

			`return [];`
			`};`

			`// Your experiment configuration`
			`Eval('development', {`
			`experimentName: 'random-think-and-prep-updates',`
			`data: initDataset({`
			`project: 'development',`
			`dataset: 'Random-Tests',`
			`}),`
			`task: runAnalystWorkflow,`
			`scores: [], // No scoring functions for now - just running the experiment`
Update packages/ai/evals/agents/analyst-agent/metrics/think-and-prep-updates.ts Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> 2025-07-23 22:28:38 +08:00			`maxConcurrency: 10, // Run up to 10 tests at the same time`
Refactor think-and-prep agent tools and update scoring functions - Renamed `respondWithoutAnalysis` to `respondWithoutAssetCreation` across multiple files to better reflect its purpose. - Updated the `think-and-prep-instructions.ts` to clarify the agent's focus on asset creation instead of analysis. - Added a new file `think-and-prep-updates.ts` to implement the analyst workflow with improved error handling and response management. - Commented out legacy code in `example_scorers.ts` and `metric.eval.ts` for clarity and future reference. - Adjusted the `sequential-thinking-tool` to remove unnecessary fields and streamline the thought processing logic. This refactor enhances the clarity and functionality of the think-and-prep agent, aligning it with the current project goals. 2025-07-23 01:56:40 +08:00			`});`