salesforcecli · andresrivas-sf · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/command-snapshot.json b/command-snapshot.json
@@ -228,7 +228,7 @@
     "alias": [],
     "command": "agent:test:run-eval",
     "flagAliases": [],
-    "flagChars": ["n", "o", "s", "w"],
+    "flagChars": ["n", "o", "s"],
     "flags": [
       "api-name",
       "api-version",
@@ -238,8 +238,7 @@
       "no-normalize",
       "result-format",
       "spec",
-      "target-org",
-      "wait"
+      "target-org"
     ],
     "plugin": "@salesforce/plugin-agent"
   },

@@ -20,10 +20,6 @@ Path to test spec file (YAML or JSON). Supports reading from stdin when piping c
 
 Agent DeveloperName (also called API name) to resolve agent_id and agent_version_id. Auto-inferred from the YAML spec's subjectName.
 
-# flags.wait.summary
-
-Number of minutes to wait for results.
-
 # flags.result-format.summary
 
 Format of the agent test results.

diff --git a/src/commands/agent/test/run-eval.ts b/src/commands/agent/test/run-eval.ts
@@ -16,11 +16,22 @@
 
 import { readFile } from 'node:fs/promises';
 import { Flags, SfCommand, toHelpSection } from '@salesforce/sf-plugins-core';
-import { EnvironmentVariable, Messages, Org, SfError } from '@salesforce/core';
-import { type EvalPayload, normalizePayload, splitIntoBatches } from '../../../evalNormalizer.js';
-import { type EvalApiResponse, formatResults, type ResultFormat } from '../../../evalFormatter.js';
+import { EnvironmentVariable, Messages, SfError } from '@salesforce/core';
+import {
+  type EvalPayload,
+  normalizePayload,
+  splitIntoBatches,
+  type EvalApiResponse,
+  formatResults,
+  type ResultFormat,
+  isYamlTestSpec,
+  parseTestSpec,
+  translateTestSpec,
+  resolveAgent,
+  executeBatches,
+  buildResultSummary,
+} from '@salesforce/agents';
 import { resultFormatFlag } from '../../../flags.js';
-import { isYamlTestSpec, parseTestSpec, translateTestSpec } from '../../../yamlSpecTranslator.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run-eval');
@@ -30,132 +41,35 @@ export type RunEvalResult = {
   summary: { passed: number; failed: number; scored: number; errors: number };
 };
 
-// --- Standalone helper functions ---
-
-type ApiHeaders = {
-  orgId: string;
-  userId: string;
-  instanceUrl: string;
-};
-
-async function getApiHeaders(org: Org): Promise<ApiHeaders> {
-  const conn = org.getConnection();
-  const userInfo = await conn.request<{ user_id: string }>(`${conn.instanceUrl}/services/oauth2/userinfo`);
-
-  return {
-    orgId: org.getOrgId(),
-    userId: userInfo.user_id,
-    instanceUrl: conn.instanceUrl,
-  };
-}
-
-async function callEvalApi(org: Org, payload: EvalPayload, headers: ApiHeaders): Promise<{ results?: unknown[] }> {
-  const conn = org.getConnection();
-
-  return conn.request<{ results?: unknown[] }>({
-    url: 'https://api.salesforce.com/einstein/evaluation/v1/tests',
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'x-sfdc-core-tenant-id': `core/prod/${headers.orgId}`,
-      'x-org-id': headers.orgId,
-      'x-sfdc-core-instance-url': headers.instanceUrl,
-      'x-sfdc-user-id': headers.userId,
-      'x-client-feature-id': 'AIPlatformEvaluation',
-      'x-sfdc-app-context': 'EinsteinGPT',
-    },
-    body: JSON.stringify(payload),
-  });
-}
-
-async function resolveAgent(org: Org, apiName: string): Promise<{ agentId: string; versionId: string }> {
-  const conn = org.getConnection();
-
-  // Escape single quotes to prevent SOQL injection
-  const escapedApiName = apiName.replace(/'/g, "\\'");
-
-  const botResult = await conn.query<{ Id: string }>(
-    `SELECT Id FROM BotDefinition WHERE DeveloperName = '${escapedApiName}'`
-  );
-  if (!botResult.records.length) {
-    throw messages.createError('error.agentNotFound', [apiName]);
+async function resolveAndInjectAgent(
+  org: Parameters<typeof resolveAgent>[0],
+  agentApiName: string,
+  payload: EvalPayload
+): Promise<void> {
+  let agentId: string;
+  let versionId: string;
+  try {
+    ({ agentId, versionId } = await resolveAgent(org, agentApiName));
+  } catch (e) {
+    const wrapped = SfError.wrap(e);
+    throw new SfError(`Agent '${agentApiName}' not found.`, 'AgentNotFound', [], 2, wrapped);
   }
-  const agentId = botResult.records[0].Id;
-
-  // Filter to published/active versions only
-  const versionResult = await conn.query<{ Id: string }>(
-    `SELECT Id FROM BotVersion WHERE BotDefinitionId = '${agentId}'  ORDER BY VersionNumber DESC LIMIT 1`
-  );
-  if (!versionResult.records.length) {
-    throw messages.createError('error.agentVersionNotFound', [apiName]);
-  }
-  const versionId = versionResult.records[0].Id;
-
-  return { agentId, versionId };
-}
-
-async function executeBatches(
-  org: Org,
-  batches: Array<EvalPayload['tests']>,
-  log: (msg: string) => void
-): Promise<unknown[]> {
-  // Pre-calculate headers once to avoid redundant API calls
-  const headers = await getApiHeaders(org);
-
-  // Execute all batches in parallel for better performance
-  if (batches.length > 1) {
-    log(messages.getMessage('info.batchProgress', [batches.length, batches.length, 'total']));
-  }
-
-  const batchPromises = batches.map(async (batch) => {
-    const batchPayload: EvalPayload = { tests: batch };
-    const resultObj = await callEvalApi(org, batchPayload, headers);
-    return resultObj.results ?? [];
-  });
-
-  const batchResults = await Promise.all(batchPromises);
-  return batchResults.flat();
-}
-
-function buildResultSummary(mergedResponse: EvalApiResponse): {
-  summary: RunEvalResult['summary'];
-  testSummaries: RunEvalResult['tests'];
-} {
-  const summary = { passed: 0, failed: 0, scored: 0, errors: 0 };
-  const testSummaries: Array<{ id: string; status: string; evaluations: unknown[]; outputs: unknown[] }> = [];
-
-  for (const testResult of mergedResponse.results ?? []) {
-    const tr = testResult as Record<string, unknown>;
-    const testId = (tr.id as string) ?? 'unknown';
-    const evalResults = (tr.evaluation_results as Array<Record<string, unknown>>) ?? [];
-    const testErrors = (tr.errors as unknown[]) ?? [];
-
-    const passed = evalResults.filter((e) => e.is_pass === true).length;
-    const failed = evalResults.filter((e) => e.is_pass === false).length;
-    const scored = evalResults.filter((e) => e.score != null && e.is_pass == null).length;
-
-    summary.passed += passed;
-    summary.failed += failed;
-    summary.scored += scored;
-    summary.errors += testErrors.length;
-
-    testSummaries.push({
-      id: testId,
-      status: failed > 0 || testErrors.length > 0 ? 'failed' : 'passed',
-      evaluations: evalResults,
-      outputs: (tr.outputs as unknown[]) ?? [],
-    });
+  for (const test of payload.tests) {
+    for (const step of test.steps) {
+      if (step.type === 'agent.create_session') {
+        // eslint-disable-next-line camelcase
+        step.agent_id = agentId;
+        // eslint-disable-next-line camelcase
+        step.agent_version_id = versionId;
+      }
+    }
   }
-
-  return { summary, testSummaries };
 }
 
 export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
   public static readonly summary = messages.getMessage('summary');
   public static readonly description = messages.getMessage('description');
   public static readonly examples = messages.getMessages('examples');
-  public static state = 'beta';
-  public static readonly hidden = true;
 
   public static readonly envVariablesSection = toHelpSection(
     'ENVIRONMENT VARIABLES',
@@ -182,11 +96,6 @@ export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
       char: 'n',
       summary: messages.getMessage('flags.api-name.summary'),
     }),
-    wait: Flags.integer({
-      char: 'w',
-      default: 10,
-      summary: messages.getMessage('flags.wait.summary'),
-    }),
     'result-format': resultFormatFlag(),
     'batch-size': Flags.integer({
       default: 5,
@@ -204,41 +113,39 @@ export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
 
     // 1. Get spec content (from file or stdin via allowStdin)
     let rawContent = flags.spec;
+    let isYaml = isYamlTestSpec(rawContent);
 
-    // If spec looks like it might be a file path (not parseable content), read the file
-    try {
-      // Try to detect if it's actual content vs a file path
-      // If it's a valid YAML/JSON, it's content; otherwise treat as file path
-      if (!isYamlTestSpec(rawContent)) {
-        JSON.parse(rawContent);
-      }
-      // If we got here, it's valid content
-    } catch {
-      // Not valid content, must be a file path - read it
+    if (!isYaml) {
       try {
-        rawContent = await readFile(flags.spec, 'utf-8');
-      } catch (e) {
-        const wrapped = SfError.wrap(e);
-        throw new SfError(`Spec file not found: ${flags.spec}`, 'SpecFileNotFound', [], 2, wrapped);
+        JSON.parse(rawContent);
+      } catch {
+        try {
+          rawContent = await readFile(flags.spec, 'utf-8');
+        } catch (e) {
+          const wrapped = SfError.wrap(e);
+          throw new SfError(`Spec file not found: ${flags.spec}`, 'SpecFileNotFound', [], 2, wrapped);
+        }
+        isYaml = isYamlTestSpec(rawContent);
       }
     }
 
     // 2. Detect format and parse
-    let payload: EvalPayload;
+    let payload!: EvalPayload;
     let agentApiName = flags['api-name'];
 
-    if (isYamlTestSpec(rawContent)) {
-      // YAML TestSpec detected — translate to EvalPayload
-      const spec = parseTestSpec(rawContent);
-      payload = translateTestSpec(spec);
+    if (isYaml) {
+      try {
+        const spec = parseTestSpec(rawContent);
+        payload = translateTestSpec(spec);
 
-      // Auto-infer api-name from subjectName if not explicitly provided
-      if (!agentApiName) {
-        agentApiName = spec.subjectName;
-        this.log(messages.getMessage('info.yamlDetected', [spec.subjectName, spec.testCases.length.toString()]));
+        if (!agentApiName) {
+          agentApiName = spec.subjectName;
+          this.log(messages.getMessage('info.yamlDetected', [spec.subjectName, spec.testCases.length.toString()]));
+        }
+      } catch (e) {
+        throw messages.createError('error.invalidPayload', [(e as Error).message]);
       }
     } else {
-      // JSON EvalPayload (original behavior)
       try {
         payload = JSON.parse(rawContent) as EvalPayload;
       } catch (e) {
@@ -250,47 +157,28 @@ export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
       throw messages.createError('error.invalidPayload', ['missing or empty "tests" array']);
     }
 
-    // 3. If --api-name (or auto-inferred from YAML), resolve IDs and inject
-    if (agentApiName) {
-      let agentId;
-      let versionId;
-      try {
-        const resolved = await resolveAgent(org, agentApiName);
-        agentId = resolved.agentId;
-        versionId = resolved.versionId;
-      } catch (e) {
-        const wrapped = SfError.wrap(e);
-        throw new SfError(`Agent '${agentApiName}' not found.`, 'AgentNotFound', [], 2, wrapped);
+    for (const test of payload.tests) {
+      if (!Array.isArray(test.steps)) {
+        throw messages.createError('error.invalidPayload', [`test '${test.id}' has missing or invalid 'steps' array`]);
       }
+    }
 
-      for (const test of payload.tests) {
-        for (const step of test.steps) {
-          if (step.type === 'agent.create_session') {
-            // eslint-disable-next-line camelcase
-            step.agent_id = agentId;
-            // eslint-disable-next-line camelcase
-            step.agent_version_id = versionId;
-          }
-        }
-      }
+    // 3. If --api-name (or auto-inferred from YAML), resolve IDs and inject
+    if (agentApiName) {
+      await resolveAndInjectAgent(org, agentApiName, payload);
     }
 
     // 4. Normalize payload unless --no-normalize
     if (!flags['no-normalize']) {
       payload = normalizePayload(payload);
     }
 
-    // 5. Clamp batch size
+    // 5. Clamp batch size and split into batches
     const batchSize = Math.min(Math.max(flags['batch-size'], 1), 5);
-
-    // 6. Split into batches
     const batches = splitIntoBatches(payload.tests, batchSize);
 
-    // 7. Execute batches
-    let allResults;
-    try {
-      allResults = await executeBatches(org, batches, (msg) => this.log(msg));
-    } catch (e) {
+    // 6. Execute batches
+    const allResults = await executeBatches(org, batches, (msg) => this.log(msg)).catch((e) => {
       const wrapped = SfError.wrap(e);
       throw new SfError(
         `Failed to execute tests: ${wrapped.message}`,
@@ -299,20 +187,16 @@ export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
         4,
         wrapped
       );
-    }
+    });
 
     const mergedResponse: EvalApiResponse = { results: allResults as EvalApiResponse['results'] };
 
-    // 9. Format output
-    const resultFormat = (flags['result-format'] ?? 'human') as ResultFormat;
-    const formatted = formatResults(mergedResponse, resultFormat);
-    this.log(formatted);
+    // 7. Format output
+    this.log(formatResults(mergedResponse, (flags['result-format'] ?? 'human') as ResultFormat));
 
-    // 10. Build structured result for --json
+    // 8. Build structured result for --json
     const { summary, testSummaries } = buildResultSummary(mergedResponse);
 
-    // Set exit code to 1 only for execution errors (tests couldn't run)
-    // Test failures (assertions failed) are business logic and should not affect exit code
     if (summary.errors > 0) {
       process.exitCode = 1;
     }