From de61e0406f981ba5c5f3f6354731e8d85f713727 Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Wed, 13 May 2026 15:41:23 +1200
Subject: [PATCH 1/6] feat(auth): add useOAuth path so sandboxed Claude agents
 use a subscription token
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a first-class subscription-auth option for executor/judge sandbox roles:

    "agents": {
      "executor": { "command": "claude", "useOAuth": true },
      "judge":    { "command": "claude", "useOAuth": true }
    }

Why this exists: per-token API billing for a full A/B sweep against a
real SDK costs ~$135-$270 (Opus 4.7); we just stopped a run partway in
at ~$30 sunk. Claude Code on a Pro/Max/Team/Enterprise plan can
authenticate via a long-lived subscription token instead — flat-rate
billing tied to the plan.

How it works: the framework's existing `secret` path uses microsandbox
TLS-injection — the cleartext value never enters the VM, only a
placeholder substituted on the wire for the allowed host. That model is
fundamentally incompatible with `CLAUDE_CODE_OAUTH_TOKEN` because Claude
reads the token directly from `process.env`. So `useOAuth: true`:

  - Resolves CLAUDE_CODE_OAUTH_TOKEN from the host environment
    (fail-fast with a setup-token hint if unset)
  - Injects it into the sandbox as a plain env var via `sandbox.env`
  - Skips `buildAgentSecret` for that role (no API key in env at all,
    so Claude's auth precedence falls through cleanly to OAuth)
  - Sets ANTHROPIC_BASE_URL from the adapter default
  - For judge: contributes the adapter default hostname to the network
    lockdown allowlist

Validation: exactly one of `secret` or `useOAuth: true` must be set per
sandbox role. `useOAuth: true` requires `command: "claude"`. Setting
both is rejected. Adapter-side enforcement keeps the auth surface
intentionally narrow.

User flow (one-time host setup):
  claude setup-token                          # interactive, ~1 yr token
  export CLAUDE_CODE_OAUTH_TOKEN='<token>'    # then run the eval

Tests: 354 pass (added 6 — 4 config validation + 2 resolveOAuthToken).
README + config-schema reference document the new path. Type-check clean.
---
 README.md                                  | 26 ++++++++
 skills/_reference/config-schema.md         | 10 ++-
 src/commands/execute.ts                    | 21 +++++--
 src/commands/sandbox.ts                    | 17 ++++--
 src/core/__tests__/config.test.ts          | 48 +++++++++++++--
 src/core/config.ts                         | 71 ++++++++++++++--------
 src/sandbox/__tests__/microsandbox.test.ts | 26 +++++++-
 src/sandbox/microsandbox.ts                | 23 +++++++
 src/scoring/judge.ts                       | 28 ++++++---
 src/types.ts                               | 20 +++++-
 10 files changed, 235 insertions(+), 55 deletions(-)
diff --git a/README.md b/README.md
index eac76d8..26b1838 100644
--- a/README.md
+++ b/README.md
@@ -364,6 +364,32 @@ Generator and insights agents run locally and do not require a secret.
 | `baseUrl` | API base URL. Hostname is used for network allowlisting. Auto-detected for known agents. |
 | `baseUrlEnvVar` | Override the base URL env var name. Auto-detected for known agents. |
 
+#### Claude Code subscription auth (avoid API billing)
+
+If you have a Claude Pro / Max / Team / Enterprise subscription, sandbox agents using `command: "claude"` can authenticate via your subscription instead of paying per-token API charges. Set `useOAuth: true` instead of providing a `secret`:
+
+```json
+{
+  "agents": {
+    "executor": { "command": "claude", "useOAuth": true },
+    "judge":    { "command": "claude", "useOAuth": true }
+  }
+}
+```
+
+One-time host setup:
+
+```bash
+claude setup-token             # interactive — generates a long-lived OAuth token
+export CLAUDE_CODE_OAUTH_TOKEN='<token>'   # before running the eval
+```
+
+Notes:
+- Token is injected into the sandbox as a plain env var (Claude reads it directly from `process.env`; the API-key TLS-substitution model does not apply).
+- Only valid for `command: "claude"`. The framework rejects `useOAuth: true` on other adapters at config-load time.
+- Setting both `secret` and `useOAuth` is rejected — choose one path per role.
+- Subscription concurrent-session caps apply.
+
 #### Custom agents
 
 Custom agents support additional args fields with `{prompt}` and `{workDir}` placeholders:
diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
index 6e96773..c1bb0a5 100644
--- a/skills/_reference/config-schema.md
+++ b/skills/_reference/config-schema.md
@@ -76,11 +76,16 @@
 
 ### SandboxAgentConfig (executor, judge)
 
-Extends AgentConfig with one **required** field:
+Extends AgentConfig with auth fields. **Exactly one of `secret` or `useOAuth` is required.**
 
 | Field | Type | Required |
 |-------|------|----------|
-| `secret` | `AgentSecretConfig` | **Yes** |
+| `secret` | `AgentSecretConfig` | One-of — API key via TLS injection |
+| `useOAuth` | `boolean` | One-of — Claude Code subscription via `CLAUDE_CODE_OAUTH_TOKEN`. Only valid when `command: "claude"`. |
+
+**API-key path (`secret`)** — microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
+
+**Subscription path (`useOAuth: true`)** — reads `CLAUDE_CODE_OAUTH_TOKEN` from the host environment and injects it into the sandbox as a plain env var. Claude reads the token directly. Generate the token once with `claude setup-token` (Pro / Max / Team / Enterprise required), export it, then `useOAuth: true` on both executor and judge. Avoids per-token API billing.
 
 ### AgentSecretConfig
 
@@ -147,6 +152,7 @@ Custom agents (any command not in the table above) **must** provide `envVar` and
 7. `agents.executor` and `agents.judge` must have `secret.value` (non-empty string)
 8. Custom agents must provide `envVar` and `baseUrl` in their secret
 9. `baseUrl` must be a parseable URL
+10. Each sandbox agent role (`executor`, `judge`) must declare auth: either `secret` (API-key path) or `useOAuth: true` (Claude Code subscription path). Setting both is rejected. `useOAuth: true` requires `command: "claude"`.
 
 ## Minimal Examples
 
diff --git a/src/commands/execute.ts b/src/commands/execute.ts
index af24ee0..77f43a7 100644
--- a/src/commands/execute.ts
+++ b/src/commands/execute.ts
@@ -3,7 +3,7 @@ import ora from 'ora';
 import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, saveResult, saveBinaryResult, formatElapsed } from '../core/suite-io.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, type CommandResult } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken, type CommandResult } from '../sandbox/microsandbox.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace } from '../sandbox/scaffolding.js';
 import { WorkerPool } from '../sandbox/worker-pool.js';
@@ -158,14 +158,23 @@ export async function executeTestCase(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Merge agent secret into sandbox secrets and set base URL env var
+    // Resolve agent auth. Two paths:
+    //   - secret  → microsandbox TLS-injected placeholder for an API key
+    //   - useOAuth → plain CLAUDE_CODE_OAUTH_TOKEN env var (claude reads it directly)
     const executorConfig: SandboxAgentConfig = config.agents?.executor
       ?? { command: 'claude', secret: { value: '$ANTHROPIC_API_KEY' } };
     const execAdapter = createAdapter(executorConfig);
-    secrets.push(buildAgentSecret(executorConfig.secret, execAdapter.additionalAllowHosts));
-    const baseUrlVar = executorConfig.secret.baseUrlEnvVar ?? execAdapter.baseUrlEnvVar;
-    if (baseUrlVar && executorConfig.secret.baseUrl) {
-      env[baseUrlVar] = executorConfig.secret.baseUrl;
+    if (executorConfig.useOAuth) {
+      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
+      if (execAdapter.baseUrlEnvVar && execAdapter.defaultBaseUrl) {
+        env[execAdapter.baseUrlEnvVar] = execAdapter.defaultBaseUrl;
+      }
+    } else if (executorConfig.secret) {
+      secrets.push(buildAgentSecret(executorConfig.secret, execAdapter.additionalAllowHosts));
+      const baseUrlVar = executorConfig.secret.baseUrlEnvVar ?? execAdapter.baseUrlEnvVar;
+      if (baseUrlVar && executorConfig.secret.baseUrl) {
+        env[baseUrlVar] = executorConfig.secret.baseUrl;
+      }
     }
 
     await client.create(
diff --git a/src/commands/sandbox.ts b/src/commands/sandbox.ts
index 7bb2e05..2ca5c45 100644
--- a/src/commands/sandbox.ts
+++ b/src/commands/sandbox.ts
@@ -5,7 +5,7 @@ import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, loadBinaryResult } from '../core/suite-io.js';
 import { loadJsonFile } from '../core/results.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken } from '../sandbox/microsandbox.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { createAdapter } from '../agents/adapter.js';
@@ -59,10 +59,17 @@ export async function sandboxCommand(paths: ProjectPaths, options: SandboxOption
   if (options.mode) {
     agentConfig = getAgentConfig(config, options.mode);
     adapter = createAdapter(agentConfig);
-    secrets.push(buildAgentSecret(agentConfig.secret, adapter.additionalAllowHosts));
-    const baseUrlVar = agentConfig.secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
-    if (baseUrlVar && agentConfig.secret.baseUrl) {
-      env[baseUrlVar] = agentConfig.secret.baseUrl;
+    if (agentConfig.useOAuth) {
+      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
+      if (adapter.baseUrlEnvVar && adapter.defaultBaseUrl) {
+        env[adapter.baseUrlEnvVar] = adapter.defaultBaseUrl;
+      }
+    } else if (agentConfig.secret) {
+      secrets.push(buildAgentSecret(agentConfig.secret, adapter.additionalAllowHosts));
+      const baseUrlVar = agentConfig.secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
+      if (baseUrlVar && agentConfig.secret.baseUrl) {
+        env[baseUrlVar] = agentConfig.secret.baseUrl;
+      }
     }
   }
 
diff --git a/src/core/__tests__/config.test.ts b/src/core/__tests__/config.test.ts
index 61fe7da..7b08938 100644
--- a/src/core/__tests__/config.test.ts
+++ b/src/core/__tests__/config.test.ts
@@ -149,9 +149,9 @@ describe('loadConfig', () => {
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     const result = await loadConfig('/fake/config.json');
     // Defaults should be filled in
-    expect(result.agents?.judge?.secret.envVar).toBe('ANTHROPIC_API_KEY');
-    expect(result.agents?.judge?.secret.baseUrl).toBe('https://api.anthropic.com');
-    expect(result.agents?.judge?.secret.baseUrlEnvVar).toBe('ANTHROPIC_BASE_URL');
+    expect(result.agents?.judge?.secret?.envVar).toBe('ANTHROPIC_API_KEY');
+    expect(result.agents?.judge?.secret?.baseUrl).toBe('https://api.anthropic.com');
+    expect(result.agents?.judge?.secret?.baseUrlEnvVar).toBe('ANTHROPIC_BASE_URL');
   });
 
   it('accepts known agent with all secret fields explicit', async () => {
@@ -166,7 +166,7 @@ describe('loadConfig', () => {
     };
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     const result = await loadConfig('/fake/config.json');
-    expect(result.agents?.judge?.secret.envVar).toBe('ANTHROPIC_API_KEY');
+    expect(result.agents?.judge?.secret?.envVar).toBe('ANTHROPIC_API_KEY');
   });
 
   it('throws when sandbox agent (executor) is missing secret', async () => {
@@ -223,4 +223,44 @@ describe('loadConfig', () => {
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     await expect(loadConfig('/fake/config.json')).rejects.toThrow(/valid URL/);
   });
+
+  describe('useOAuth (Claude Code subscription auth)', () => {
+    it('accepts judge with useOAuth: true and no secret', async () => {
+      const config = {
+        ...validConfig,
+        agents: { judge: { command: 'claude', useOAuth: true } },
+      };
+      mockReadFile.mockResolvedValue(JSON.stringify(config));
+      const result = await loadConfig('/fake/config.json');
+      expect(result.agents?.judge?.useOAuth).toBe(true);
+      expect(result.agents?.judge?.secret).toBeUndefined();
+    });
+
+    it('rejects useOAuth with command != claude', async () => {
+      const config = {
+        ...validConfig,
+        agents: { judge: { command: 'codex', useOAuth: true } },
+      };
+      mockReadFile.mockResolvedValue(JSON.stringify(config));
+      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/useOAuth.*only supported for command: "claude"/);
+    });
+
+    it('rejects sandbox role with neither secret nor useOAuth', async () => {
+      const config = {
+        ...validConfig,
+        agents: { judge: { command: 'claude' } },
+      };
+      mockReadFile.mockResolvedValue(JSON.stringify(config));
+      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/secret.*or.*useOAuth/);
+    });
+
+    it('rejects setting both secret and useOAuth on the same role', async () => {
+      const config = {
+        ...validConfig,
+        agents: { judge: { command: 'claude', useOAuth: true, secret: { value: '$ANTHROPIC_API_KEY' } } },
+      };
+      mockReadFile.mockResolvedValue(JSON.stringify(config));
+      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/cannot set both useOAuth and secret/);
+    });
+  });
 });
diff --git a/src/core/config.ts b/src/core/config.ts
index 6f2df48..47408c4 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -129,36 +129,55 @@ export function validateConfig(data: unknown, configPath?: string): Config {
       const isSandboxRole = SANDBOX_ROLES.includes(role);
 
       if (isSandboxRole) {
-        // Sandbox agents (executor/judge) require secret
-        if (!agent.secret || typeof agent.secret !== 'object' || Array.isArray(agent.secret)) {
-          throw new Error(`agents.${role} requires a secret with at least { value } for secure sandbox execution`);
-        }
-        const secret = agent.secret as Record<string, unknown>;
-        if (!secret.value || typeof secret.value !== 'string') {
-          throw new Error(`agents.${role}.secret.value must be a non-empty string`);
-        }
-
-        // Fill defaults from adapter for known agents
-        const adapter = createAdapter({ command } as AgentConfig);
-        if (adapter.defaultEnvVar) {
-          if (!secret.envVar) secret.envVar = adapter.defaultEnvVar;
-          if (!secret.baseUrl) secret.baseUrl = adapter.defaultBaseUrl;
-          if (!secret.baseUrlEnvVar) secret.baseUrlEnvVar = adapter.baseUrlEnvVar;
+        const useOAuth = agent.useOAuth === true;
+
+        if (useOAuth) {
+          // OAuth path: Claude Code subscription via CLAUDE_CODE_OAUTH_TOKEN.
+          if (command !== 'claude') {
+            throw new Error(
+              `agents.${role}.useOAuth: true is only supported for command: "claude" (Claude Code subscription auth). ` +
+              `Got command: "${command ?? '(unset)'}".`,
+            );
+          }
+          if (agent.secret !== undefined) {
+            throw new Error(
+              `agents.${role} cannot set both useOAuth and secret — choose one auth path.`,
+            );
+          }
         } else {
-          // Custom agents must specify envVar and baseUrl
-          if (!secret.envVar || typeof secret.envVar !== 'string') {
-            throw new Error(`agents.${role}.secret.envVar is required for custom agent '${command}'`);
+          // API-key path: secret with TLS-injected value.
+          if (!agent.secret || typeof agent.secret !== 'object' || Array.isArray(agent.secret)) {
+            throw new Error(
+              `agents.${role} requires either { secret: {...} } or { useOAuth: true } for sandbox auth`,
+            );
           }
-          if (!secret.baseUrl || typeof secret.baseUrl !== 'string') {
-            throw new Error(`agents.${role}.secret.baseUrl is required for custom agent '${command}'`);
+          const secret = agent.secret as Record<string, unknown>;
+          if (!secret.value || typeof secret.value !== 'string') {
+            throw new Error(`agents.${role}.secret.value must be a non-empty string`);
+          }
+
+          // Fill defaults from adapter for known agents
+          const adapter = createAdapter({ command } as AgentConfig);
+          if (adapter.defaultEnvVar) {
+            if (!secret.envVar) secret.envVar = adapter.defaultEnvVar;
+            if (!secret.baseUrl) secret.baseUrl = adapter.defaultBaseUrl;
+            if (!secret.baseUrlEnvVar) secret.baseUrlEnvVar = adapter.baseUrlEnvVar;
+          } else {
+            // Custom agents must specify envVar and baseUrl
+            if (!secret.envVar || typeof secret.envVar !== 'string') {
+              throw new Error(`agents.${role}.secret.envVar is required for custom agent '${command}'`);
+            }
+            if (!secret.baseUrl || typeof secret.baseUrl !== 'string') {
+              throw new Error(`agents.${role}.secret.baseUrl is required for custom agent '${command}'`);
+            }
           }
-        }
 
-        // Validate baseUrl is a valid URL
-        try {
-          new URL(secret.baseUrl as string);
-        } catch {
-          throw new Error(`agents.${role}.secret.baseUrl must be a valid URL`);
+          // Validate baseUrl is a valid URL
+          try {
+            new URL(secret.baseUrl as string);
+          } catch {
+            throw new Error(`agents.${role}.secret.baseUrl must be a valid URL`);
+          }
         }
       }
     }
diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
index a927d73..31f0736 100644
--- a/src/sandbox/__tests__/microsandbox.test.ts
+++ b/src/sandbox/__tests__/microsandbox.test.ts
@@ -1,5 +1,5 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { MicrosandboxClient, buildSecrets, resolveEnv } from '../microsandbox.js';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { MicrosandboxClient, buildSecrets, resolveEnv, resolveOAuthToken } from '../microsandbox.js';
 
 // ── Mocks ────────────────────────────────────────────────────────────────────
 
@@ -328,4 +328,26 @@ describe('MicrosandboxClient', () => {
       await expect(client.destroy()).resolves.toBeUndefined();
     });
   });
+});
+
+describe('resolveOAuthToken', () => {
+  const ORIGINAL_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
+
+  afterEach(() => {
+    if (ORIGINAL_TOKEN === undefined) {
+      delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
+    } else {
+      process.env.CLAUDE_CODE_OAUTH_TOKEN = ORIGINAL_TOKEN;
+    }
+  });
+
+  it('returns the token when CLAUDE_CODE_OAUTH_TOKEN is set on the host', () => {
+    process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-test-value';
+    expect(resolveOAuthToken()).toBe('sk-ant-oat-test-value');
+  });
+
+  it('throws with a clear setup-token hint when CLAUDE_CODE_OAUTH_TOKEN is unset', () => {
+    delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
+    expect(() => resolveOAuthToken()).toThrow(/claude setup-token/);
+  });
 });
\ No newline at end of file
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
index daa23bf..73f3a2e 100644
--- a/src/sandbox/microsandbox.ts
+++ b/src/sandbox/microsandbox.ts
@@ -63,6 +63,29 @@ export function buildAgentSecret(secret: AgentSecretConfig, additionalAllowHosts
   return Secret.env(secret.envVar, { value, allowHosts });
 }
 
+/**
+ * Resolve the Claude Code OAuth token from the host environment for
+ * `useOAuth: true` agent configs. Unlike API keys (TLS-injected as
+ * placeholders by microsandbox), OAuth tokens must enter the VM as the real
+ * value because Claude reads them directly from `process.env`. The caller
+ * places the returned value under `sandbox.env.CLAUDE_CODE_OAUTH_TOKEN`.
+ *
+ * Throws with a clear message if `CLAUDE_CODE_OAUTH_TOKEN` is not set on
+ * the host — fail-fast so the user knows to run `claude setup-token` and
+ * export the result before the eval starts.
+ */
+export function resolveOAuthToken(): string {
+  const value = process.env.CLAUDE_CODE_OAUTH_TOKEN;
+  if (!value) {
+    throw new Error(
+      "CLAUDE_CODE_OAUTH_TOKEN is not set on the host. " +
+      "Generate a long-lived subscription token with `claude setup-token` " +
+      "and `export CLAUDE_CODE_OAUTH_TOKEN=<value>` before running the eval.",
+    );
+  }
+  return value;
+}
+
 function resolveValue(value: string, envVar: string): string {
   if (value.startsWith('$')) {
     const hostVar = value.slice(1);
diff --git a/src/scoring/judge.ts b/src/scoring/judge.ts
index ce6af0b..dcf553d 100644
--- a/src/scoring/judge.ts
+++ b/src/scoring/judge.ts
@@ -1,7 +1,7 @@
 import type { SolutionFile, JudgeScore, TestCase, SandboxAgentConfig, TargetConfig, Config, ProjectPaths, SourceConfig } from '../types.js';
 import { createAdapter } from '../agents/adapter.js';
 import { JUDGE_SCORING_CRITERIA, extractJson } from '../commands/prompt-helpers.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken } from '../sandbox/microsandbox.js';
 import { createEgressLockdownLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { deduplicateSources } from '../core/source-resolver.js';
@@ -135,9 +135,14 @@ const INFRA_ALLOWLIST = [
 export function buildJudgeAllowlist(judgeConfig: SandboxAgentConfig, config: Config): string[] {
   const hosts = new Set<string>();
 
-  // 1. Agent API endpoint from secret's baseUrl
-  if (judgeConfig.secret.baseUrl) {
+  // 1. Agent API endpoint — from secret.baseUrl (API-key path) or adapter default (OAuth path).
+  if (judgeConfig.secret?.baseUrl) {
     try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
+  } else if (judgeConfig.useOAuth) {
+    const adapter = createAdapter(judgeConfig);
+    if (adapter.defaultBaseUrl) {
+      try { hosts.add(new URL(adapter.defaultBaseUrl).hostname); } catch { /* skip malformed */ }
+    }
   }
 
   // 2. Secrets allowHosts
@@ -286,12 +291,19 @@ export async function runSandboxedJudge(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Merge agent secret into sandbox secrets and set base URL env var
+    // Resolve agent auth — same two-path model as the executor.
     const judgeAdapter = createAdapter(judgeConfig);
-    secrets.push(buildAgentSecret(judgeConfig.secret, judgeAdapter.additionalAllowHosts));
-    const baseUrlVar = judgeConfig.secret.baseUrlEnvVar ?? judgeAdapter.baseUrlEnvVar;
-    if (baseUrlVar && judgeConfig.secret.baseUrl) {
-      env[baseUrlVar] = judgeConfig.secret.baseUrl;
+    if (judgeConfig.useOAuth) {
+      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
+      if (judgeAdapter.baseUrlEnvVar && judgeAdapter.defaultBaseUrl) {
+        env[judgeAdapter.baseUrlEnvVar] = judgeAdapter.defaultBaseUrl;
+      }
+    } else if (judgeConfig.secret) {
+      secrets.push(buildAgentSecret(judgeConfig.secret, judgeAdapter.additionalAllowHosts));
+      const baseUrlVar = judgeConfig.secret.baseUrlEnvVar ?? judgeAdapter.baseUrlEnvVar;
+      if (baseUrlVar && judgeConfig.secret.baseUrl) {
+        env[baseUrlVar] = judgeConfig.secret.baseUrl;
+      }
     }
 
     await client.create(
diff --git a/src/types.ts b/src/types.ts
index c3dae8f..4e04224 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -109,10 +109,26 @@ export interface AgentConfig {
   logPattern?: string;
 }
 
-/** Agent config for sandboxed execution (executor/judge). Secret is required for microsandbox TLS injection. */
+/** Agent config for sandboxed execution (executor/judge).
+ *
+ * Auth: exactly one of `secret` or `useOAuth: true` is required.
+ *
+ * - `secret` is the API-key path — values are TLS-injected by microsandbox so
+ *   the cleartext never enters the VM, and the env contains a placeholder
+ *   substituted on the wire only for the agent's allowed host.
+ * - `useOAuth: true` is the Claude Code subscription path — reads
+ *   `CLAUDE_CODE_OAUTH_TOKEN` (generated by `claude setup-token`, requires
+ *   Pro / Max / Team / Enterprise) from the host environment and injects it
+ *   into the sandbox as a plain env var. Subscription auth is required here
+ *   because Claude reads the token directly from `process.env`; the TLS
+ *   substitution model does not work for OAuth. Only valid when
+ *   `command: "claude"`.
+ */
 export interface SandboxAgentConfig extends AgentConfig {
   /** Agent's API secret and base URL. Flows to microsandbox TLS injection, sandbox env, and judge lockdown allowlist. */
-  secret: AgentSecretConfig;
+  secret?: AgentSecretConfig;
+  /** Use Claude Code subscription auth via `CLAUDE_CODE_OAUTH_TOKEN`. Only valid for `command: "claude"`. */
+  useOAuth?: boolean;
 }
 
 export interface TargetConfig {

From 0422269ea4e635a72b062658a888787a9b3efc5d Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Fri, 15 May 2026 10:28:15 +1200
Subject: [PATCH 2/6] refactor(auth): detect auth mode from secret.value
 prefix, drop useOAuth flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the dedicated `useOAuth: true` field on `SandboxAgentConfig` with
runtime prefix detection on the resolved `secret.value`:

  - `sk-ant-api-…` → API-key path (microsandbox TLS injection, unchanged)
  - `sk-ant-oat-…` → OAuth path (plain `CLAUDE_CODE_OAUTH_TOKEN` env var)

User experience is now one consistent shape — always set `secret.value` to a
host env var reference; the runtime picks the auth mode from the resolved
value at sandbox-create time:

    "executor": { "command": "claude", "secret": { "value": "$ANTHROPIC_API_KEY"      } }
    "executor": { "command": "claude", "secret": { "value": "$CLAUDE_CODE_OAUTH_TOKEN" } }

Mechanics:
  - New `applyAgentAuth(secret, adapter, secrets, env)` helper in
    `microsandbox.ts` consolidates the 3 sandbox-creation call sites
    (`execute.ts`, `judge.ts`, `sandbox.ts`) into a single call. The helper
    handles both auth modes internally.
  - New `isOAuthSecret(secret)` helper exposes the same prefix check for the
    judge's `buildJudgeAllowlist` (which adds the adapter's default hostname
    to the lockdown allowlist when in OAuth mode, since the OAuth path has no
    `secret.baseUrl` to derive from).
  - `SandboxAgentConfig.secret` becomes required again (no parallel field).
  - Removed `resolveOAuthToken()` and `buildAgentSecret()` — both subsumed by
    `applyAgentAuth`.

Trade-off: depends on Anthropic's documented OAuth-vs-API-key prefix scheme
(`sk-ant-oat-` vs `sk-ant-api-`). If that scheme changes, the eval silently
misclassifies. Caller failure mode is an auth error from the Claude API at
request time, which is observable in run logs.

Tests: drop 4 useOAuth config tests + 2 resolveOAuthToken tests; add 3 tests
for `isOAuthSecret` (OAuth value, API-key value, unset env var) and 3 tests
for `applyAgentAuth` (OAuth path, API-key path, missing required fields).
331 tests pass; type-check + lint clean.
---
 README.md                                  | 12 +--
 skills/_reference/config-schema.md         | 13 +--
 src/commands/__tests__/execute.test.ts     |  3 +-
 src/commands/execute.ts                    | 18 +---
 src/commands/sandbox.ts                    | 15 +---
 src/core/__tests__/config.test.ts          | 54 +++---------
 src/core/config.ts                         | 73 +++++++----------
 src/sandbox/__tests__/microsandbox.test.ts | 95 ++++++++++++++++++----
 src/sandbox/microsandbox.ts                | 80 ++++++++++++------
 src/scoring/__tests__/judge.test.ts        |  3 +-
 src/scoring/judge.ts                       | 25 ++----
 src/types.ts                               | 32 ++++----
 12 files changed, 218 insertions(+), 205 deletions(-)

diff --git a/README.md b/README.md
index 26b1838..f907d61 100644
--- a/README.md
+++ b/README.md
@@ -366,13 +366,13 @@ Generator and insights agents run locally and do not require a secret.
 
 #### Claude Code subscription auth (avoid API billing)
 
-If you have a Claude Pro / Max / Team / Enterprise subscription, sandbox agents using `command: "claude"` can authenticate via your subscription instead of paying per-token API charges. Set `useOAuth: true` instead of providing a `secret`:
+If you have a Claude Pro / Max / Team / Enterprise subscription, sandbox agents using `command: "claude"` can authenticate via your subscription instead of paying per-token API charges. Point `secret.value` at the Claude Code OAuth token instead of an API key:
 
 ```json
 {
   "agents": {
-    "executor": { "command": "claude", "useOAuth": true },
-    "judge":    { "command": "claude", "useOAuth": true }
+    "executor": { "command": "claude", "secret": { "value": "$CLAUDE_CODE_OAUTH_TOKEN" } },
+    "judge":    { "command": "claude", "secret": { "value": "$CLAUDE_CODE_OAUTH_TOKEN" } }
   }
 }
 ```
@@ -384,11 +384,7 @@ claude setup-token             # interactive — generates a long-lived OAuth to
 export CLAUDE_CODE_OAUTH_TOKEN='<token>'   # before running the eval
 ```
 
-Notes:
-- Token is injected into the sandbox as a plain env var (Claude reads it directly from `process.env`; the API-key TLS-substitution model does not apply).
-- Only valid for `command: "claude"`. The framework rejects `useOAuth: true` on other adapters at config-load time.
-- Setting both `secret` and `useOAuth` is rejected — choose one path per role.
-- Subscription concurrent-session caps apply.
+How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat-`; API keys start with `sk-ant-api-`. When the value is an OAuth token, it's injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var (Claude Code reads it directly from `process.env`; the API-key TLS-substitution model doesn't apply for OAuth). Subscription concurrent-session caps apply.
 
 #### Custom agents
 
diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
index c1bb0a5..8314c14 100644
--- a/skills/_reference/config-schema.md
+++ b/skills/_reference/config-schema.md
@@ -76,16 +76,18 @@
 
 ### SandboxAgentConfig (executor, judge)
 
-Extends AgentConfig with auth fields. **Exactly one of `secret` or `useOAuth` is required.**
+Extends AgentConfig with one **required** field:
 
 | Field | Type | Required |
 |-------|------|----------|
-| `secret` | `AgentSecretConfig` | One-of — API key via TLS injection |
-| `useOAuth` | `boolean` | One-of — Claude Code subscription via `CLAUDE_CODE_OAUTH_TOKEN`. Only valid when `command: "claude"`. |
+| `secret` | `AgentSecretConfig` | **Yes** |
 
-**API-key path (`secret`)** — microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
+Auth mode is auto-detected from the resolved `secret.value`'s prefix at sandbox-create time:
 
-**Subscription path (`useOAuth: true`)** — reads `CLAUDE_CODE_OAUTH_TOKEN` from the host environment and injects it into the sandbox as a plain env var. Claude reads the token directly. Generate the token once with `claude setup-token` (Pro / Max / Team / Enterprise required), export it, then `useOAuth: true` on both executor and judge. Avoids per-token API billing.
+- `sk-ant-api-…` (Anthropic API key) → microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
+- `sk-ant-oat-…` (Claude Code subscription OAuth token, issued by `claude setup-token`) → injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly from `process.env`, so the TLS-substitution model does not apply. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
+
+Point `secret.value` at the host env var that holds the credential — `"$ANTHROPIC_API_KEY"` for the API-key path, `"$CLAUDE_CODE_OAUTH_TOKEN"` for the subscription path.
 
 ### AgentSecretConfig
 
@@ -152,7 +154,6 @@ Custom agents (any command not in the table above) **must** provide `envVar` and
 7. `agents.executor` and `agents.judge` must have `secret.value` (non-empty string)
 8. Custom agents must provide `envVar` and `baseUrl` in their secret
 9. `baseUrl` must be a parseable URL
-10. Each sandbox agent role (`executor`, `judge`) must declare auth: either `secret` (API-key path) or `useOAuth: true` (Claude Code subscription path). Setting both is rejected. `useOAuth: true` requires `command: "claude"`.
 
 ## Minimal Examples
 
diff --git a/src/commands/__tests__/execute.test.ts b/src/commands/__tests__/execute.test.ts
index 494ea7c..e600125 100644
--- a/src/commands/__tests__/execute.test.ts
+++ b/src/commands/__tests__/execute.test.ts
@@ -41,7 +41,8 @@ vi.mock('../../sandbox/microsandbox.js', () => {
   return {
     MicrosandboxClient: MockMicrosandboxClient,
     buildSecrets: vi.fn().mockReturnValue([]),
-    buildAgentSecret: vi.fn().mockReturnValue({}),
+    applyAgentAuth: vi.fn(),
+    isOAuthSecret: vi.fn().mockReturnValue(false),
     resolveEnv: vi.fn().mockReturnValue({}),
   };
 });
diff --git a/src/commands/execute.ts b/src/commands/execute.ts
index 77f43a7..58654ee 100644
--- a/src/commands/execute.ts
+++ b/src/commands/execute.ts
@@ -3,7 +3,7 @@ import ora from 'ora';
 import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, saveResult, saveBinaryResult, formatElapsed } from '../core/suite-io.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken, type CommandResult } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv, type CommandResult } from '../sandbox/microsandbox.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace } from '../sandbox/scaffolding.js';
 import { WorkerPool } from '../sandbox/worker-pool.js';
@@ -158,24 +158,10 @@ export async function executeTestCase(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Resolve agent auth. Two paths:
-    //   - secret  → microsandbox TLS-injected placeholder for an API key
-    //   - useOAuth → plain CLAUDE_CODE_OAUTH_TOKEN env var (claude reads it directly)
     const executorConfig: SandboxAgentConfig = config.agents?.executor
       ?? { command: 'claude', secret: { value: '$ANTHROPIC_API_KEY' } };
     const execAdapter = createAdapter(executorConfig);
-    if (executorConfig.useOAuth) {
-      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
-      if (execAdapter.baseUrlEnvVar && execAdapter.defaultBaseUrl) {
-        env[execAdapter.baseUrlEnvVar] = execAdapter.defaultBaseUrl;
-      }
-    } else if (executorConfig.secret) {
-      secrets.push(buildAgentSecret(executorConfig.secret, execAdapter.additionalAllowHosts));
-      const baseUrlVar = executorConfig.secret.baseUrlEnvVar ?? execAdapter.baseUrlEnvVar;
-      if (baseUrlVar && executorConfig.secret.baseUrl) {
-        env[baseUrlVar] = executorConfig.secret.baseUrl;
-      }
-    }
+    applyAgentAuth(executorConfig.secret, execAdapter, secrets, env);
 
     await client.create(
       sandboxName(testCase.id),
diff --git a/src/commands/sandbox.ts b/src/commands/sandbox.ts
index 2ca5c45..8645cf1 100644
--- a/src/commands/sandbox.ts
+++ b/src/commands/sandbox.ts
@@ -5,7 +5,7 @@ import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, loadBinaryResult } from '../core/suite-io.js';
 import { loadJsonFile } from '../core/results.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv } from '../sandbox/microsandbox.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { createAdapter } from '../agents/adapter.js';
@@ -59,18 +59,7 @@ export async function sandboxCommand(paths: ProjectPaths, options: SandboxOption
   if (options.mode) {
     agentConfig = getAgentConfig(config, options.mode);
     adapter = createAdapter(agentConfig);
-    if (agentConfig.useOAuth) {
-      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
-      if (adapter.baseUrlEnvVar && adapter.defaultBaseUrl) {
-        env[adapter.baseUrlEnvVar] = adapter.defaultBaseUrl;
-      }
-    } else if (agentConfig.secret) {
-      secrets.push(buildAgentSecret(agentConfig.secret, adapter.additionalAllowHosts));
-      const baseUrlVar = agentConfig.secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
-      if (baseUrlVar && agentConfig.secret.baseUrl) {
-        env[baseUrlVar] = agentConfig.secret.baseUrl;
-      }
-    }
+    applyAgentAuth(agentConfig.secret, adapter, secrets, env);
   }
 
   // Prepare output directory for artifacts
diff --git a/src/core/__tests__/config.test.ts b/src/core/__tests__/config.test.ts
index 7b08938..b8bf405 100644
--- a/src/core/__tests__/config.test.ts
+++ b/src/core/__tests__/config.test.ts
@@ -149,9 +149,9 @@ describe('loadConfig', () => {
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     const result = await loadConfig('/fake/config.json');
     // Defaults should be filled in
-    expect(result.agents?.judge?.secret?.envVar).toBe('ANTHROPIC_API_KEY');
-    expect(result.agents?.judge?.secret?.baseUrl).toBe('https://api.anthropic.com');
-    expect(result.agents?.judge?.secret?.baseUrlEnvVar).toBe('ANTHROPIC_BASE_URL');
+    expect(result.agents?.judge?.secret.envVar).toBe('ANTHROPIC_API_KEY');
+    expect(result.agents?.judge?.secret.baseUrl).toBe('https://api.anthropic.com');
+    expect(result.agents?.judge?.secret.baseUrlEnvVar).toBe('ANTHROPIC_BASE_URL');
   });
 
   it('accepts known agent with all secret fields explicit', async () => {
@@ -166,7 +166,7 @@ describe('loadConfig', () => {
     };
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     const result = await loadConfig('/fake/config.json');
-    expect(result.agents?.judge?.secret?.envVar).toBe('ANTHROPIC_API_KEY');
+    expect(result.agents?.judge?.secret.envVar).toBe('ANTHROPIC_API_KEY');
   });
 
   it('throws when sandbox agent (executor) is missing secret', async () => {
@@ -224,43 +224,13 @@ describe('loadConfig', () => {
     await expect(loadConfig('/fake/config.json')).rejects.toThrow(/valid URL/);
   });
 
-  describe('useOAuth (Claude Code subscription auth)', () => {
-    it('accepts judge with useOAuth: true and no secret', async () => {
-      const config = {
-        ...validConfig,
-        agents: { judge: { command: 'claude', useOAuth: true } },
-      };
-      mockReadFile.mockResolvedValue(JSON.stringify(config));
-      const result = await loadConfig('/fake/config.json');
-      expect(result.agents?.judge?.useOAuth).toBe(true);
-      expect(result.agents?.judge?.secret).toBeUndefined();
-    });
-
-    it('rejects useOAuth with command != claude', async () => {
-      const config = {
-        ...validConfig,
-        agents: { judge: { command: 'codex', useOAuth: true } },
-      };
-      mockReadFile.mockResolvedValue(JSON.stringify(config));
-      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/useOAuth.*only supported for command: "claude"/);
-    });
-
-    it('rejects sandbox role with neither secret nor useOAuth', async () => {
-      const config = {
-        ...validConfig,
-        agents: { judge: { command: 'claude' } },
-      };
-      mockReadFile.mockResolvedValue(JSON.stringify(config));
-      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/secret.*or.*useOAuth/);
-    });
-
-    it('rejects setting both secret and useOAuth on the same role', async () => {
-      const config = {
-        ...validConfig,
-        agents: { judge: { command: 'claude', useOAuth: true, secret: { value: '$ANTHROPIC_API_KEY' } } },
-      };
-      mockReadFile.mockResolvedValue(JSON.stringify(config));
-      await expect(loadConfig('/fake/config.json')).rejects.toThrow(/cannot set both useOAuth and secret/);
-    });
+  it('accepts secret pointing at $CLAUDE_CODE_OAUTH_TOKEN (auth mode resolved later by value prefix)', async () => {
+    const config = {
+      ...validConfig,
+      agents: { judge: { command: 'claude', secret: { value: '$CLAUDE_CODE_OAUTH_TOKEN' } } },
+    };
+    mockReadFile.mockResolvedValue(JSON.stringify(config));
+    const result = await loadConfig('/fake/config.json');
+    expect(result.agents?.judge?.secret?.value).toBe('$CLAUDE_CODE_OAUTH_TOKEN');
   });
 });
diff --git a/src/core/config.ts b/src/core/config.ts
index 47408c4..a1d618b 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -129,55 +129,38 @@ export function validateConfig(data: unknown, configPath?: string): Config {
       const isSandboxRole = SANDBOX_ROLES.includes(role);
 
       if (isSandboxRole) {
-        const useOAuth = agent.useOAuth === true;
-
-        if (useOAuth) {
-          // OAuth path: Claude Code subscription via CLAUDE_CODE_OAUTH_TOKEN.
-          if (command !== 'claude') {
-            throw new Error(
-              `agents.${role}.useOAuth: true is only supported for command: "claude" (Claude Code subscription auth). ` +
-              `Got command: "${command ?? '(unset)'}".`,
-            );
-          }
-          if (agent.secret !== undefined) {
-            throw new Error(
-              `agents.${role} cannot set both useOAuth and secret — choose one auth path.`,
-            );
-          }
+        // Sandbox agents (executor/judge) require secret. Auth mode (API key vs Claude Code
+        // subscription OAuth token) is auto-detected from the resolved value's prefix at
+        // sandbox-create time.
+        if (!agent.secret || typeof agent.secret !== 'object' || Array.isArray(agent.secret)) {
+          throw new Error(`agents.${role} requires a secret with at least { value } for secure sandbox execution`);
+        }
+        const secret = agent.secret as Record<string, unknown>;
+        if (!secret.value || typeof secret.value !== 'string') {
+          throw new Error(`agents.${role}.secret.value must be a non-empty string`);
+        }
+
+        // Fill defaults from adapter for known agents
+        const adapter = createAdapter({ command } as AgentConfig);
+        if (adapter.defaultEnvVar) {
+          if (!secret.envVar) secret.envVar = adapter.defaultEnvVar;
+          if (!secret.baseUrl) secret.baseUrl = adapter.defaultBaseUrl;
+          if (!secret.baseUrlEnvVar) secret.baseUrlEnvVar = adapter.baseUrlEnvVar;
         } else {
-          // API-key path: secret with TLS-injected value.
-          if (!agent.secret || typeof agent.secret !== 'object' || Array.isArray(agent.secret)) {
-            throw new Error(
-              `agents.${role} requires either { secret: {...} } or { useOAuth: true } for sandbox auth`,
-            );
+          // Custom agents must specify envVar and baseUrl
+          if (!secret.envVar || typeof secret.envVar !== 'string') {
+            throw new Error(`agents.${role}.secret.envVar is required for custom agent '${command}'`);
           }
-          const secret = agent.secret as Record<string, unknown>;
-          if (!secret.value || typeof secret.value !== 'string') {
-            throw new Error(`agents.${role}.secret.value must be a non-empty string`);
-          }
-
-          // Fill defaults from adapter for known agents
-          const adapter = createAdapter({ command } as AgentConfig);
-          if (adapter.defaultEnvVar) {
-            if (!secret.envVar) secret.envVar = adapter.defaultEnvVar;
-            if (!secret.baseUrl) secret.baseUrl = adapter.defaultBaseUrl;
-            if (!secret.baseUrlEnvVar) secret.baseUrlEnvVar = adapter.baseUrlEnvVar;
-          } else {
-            // Custom agents must specify envVar and baseUrl
-            if (!secret.envVar || typeof secret.envVar !== 'string') {
-              throw new Error(`agents.${role}.secret.envVar is required for custom agent '${command}'`);
-            }
-            if (!secret.baseUrl || typeof secret.baseUrl !== 'string') {
-              throw new Error(`agents.${role}.secret.baseUrl is required for custom agent '${command}'`);
-            }
+          if (!secret.baseUrl || typeof secret.baseUrl !== 'string') {
+            throw new Error(`agents.${role}.secret.baseUrl is required for custom agent '${command}'`);
           }
+        }
 
-          // Validate baseUrl is a valid URL
-          try {
-            new URL(secret.baseUrl as string);
-          } catch {
-            throw new Error(`agents.${role}.secret.baseUrl must be a valid URL`);
-          }
+        // Validate baseUrl is a valid URL
+        try {
+          new URL(secret.baseUrl as string);
+        } catch {
+          throw new Error(`agents.${role}.secret.baseUrl must be a valid URL`);
         }
       }
     }
diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
index 31f0736..991bab4 100644
--- a/src/sandbox/__tests__/microsandbox.test.ts
+++ b/src/sandbox/__tests__/microsandbox.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { MicrosandboxClient, buildSecrets, resolveEnv, resolveOAuthToken } from '../microsandbox.js';
+import type { SecretEntry } from 'microsandbox';
+import { MicrosandboxClient, buildSecrets, resolveEnv, applyAgentAuth, isOAuthSecret } from '../microsandbox.js';
 
 // ── Mocks ────────────────────────────────────────────────────────────────────
 
@@ -330,24 +331,90 @@ describe('MicrosandboxClient', () => {
   });
 });
 
-describe('resolveOAuthToken', () => {
-  const ORIGINAL_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
+describe('agent secret auth-mode detection', () => {
+  const ORIGINAL_API_KEY = process.env.ANTHROPIC_API_KEY;
+  const ORIGINAL_OAUTH = process.env.CLAUDE_CODE_OAUTH_TOKEN;
 
   afterEach(() => {
-    if (ORIGINAL_TOKEN === undefined) {
-      delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
-    } else {
-      process.env.CLAUDE_CODE_OAUTH_TOKEN = ORIGINAL_TOKEN;
-    }
+    const restore = (key: string, value: string | undefined) => {
+      if (value === undefined) delete process.env[key];
+      else process.env[key] = value;
+    };
+    restore('ANTHROPIC_API_KEY', ORIGINAL_API_KEY);
+    restore('CLAUDE_CODE_OAUTH_TOKEN', ORIGINAL_OAUTH);
   });
 
-  it('returns the token when CLAUDE_CODE_OAUTH_TOKEN is set on the host', () => {
-    process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-test-value';
-    expect(resolveOAuthToken()).toBe('sk-ant-oat-test-value');
+  const claudeAdapter = {
+    baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
+    defaultBaseUrl: 'https://api.anthropic.com',
+    additionalAllowHosts: [],
+  };
+
+  describe('isOAuthSecret', () => {
+    it('returns true when the resolved value starts with sk-ant-oat-', () => {
+      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-fake-test-token';
+      expect(isOAuthSecret({
+        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
+        value: '$CLAUDE_CODE_OAUTH_TOKEN',
+        baseUrl: 'https://api.anthropic.com',
+      })).toBe(true);
+    });
+
+    it('returns false for an API-key shaped value', () => {
+      process.env.ANTHROPIC_API_KEY = 'sk-ant-api-fake-test-key';
+      expect(isOAuthSecret({
+        envVar: 'ANTHROPIC_API_KEY',
+        value: '$ANTHROPIC_API_KEY',
+        baseUrl: 'https://api.anthropic.com',
+      })).toBe(false);
+    });
+
+    it('returns false when the referenced host env var is unset (no throw)', () => {
+      delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
+      expect(isOAuthSecret({
+        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
+        value: '$CLAUDE_CODE_OAUTH_TOKEN',
+        baseUrl: 'https://api.anthropic.com',
+      })).toBe(false);
+    });
   });
 
-  it('throws with a clear setup-token hint when CLAUDE_CODE_OAUTH_TOKEN is unset', () => {
-    delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
-    expect(() => resolveOAuthToken()).toThrow(/claude setup-token/);
+  describe('applyAgentAuth', () => {
+    it('injects CLAUDE_CODE_OAUTH_TOKEN as a plain env var when value is an OAuth token', () => {
+      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-fake-test-token';
+      const secrets: SecretEntry[] = [];
+      const env: Record<string, string> = {};
+      applyAgentAuth({
+        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
+        value: '$CLAUDE_CODE_OAUTH_TOKEN',
+        baseUrl: 'https://api.anthropic.com',
+      }, claudeAdapter, secrets, env);
+      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat-fake-test-token');
+      expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
+      expect(secrets).toHaveLength(0);
+    });
+
+    it('wraps an API-key value in Secret.env() with the agent host on allowHosts', () => {
+      process.env.ANTHROPIC_API_KEY = 'sk-ant-api-fake-test-key';
+      const secrets: SecretEntry[] = [];
+      const env: Record<string, string> = {};
+      applyAgentAuth({
+        envVar: 'ANTHROPIC_API_KEY',
+        value: '$ANTHROPIC_API_KEY',
+        baseUrl: 'https://api.anthropic.com',
+        baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
+      }, claudeAdapter, secrets, env);
+      expect(secrets).toHaveLength(1);
+      expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
+      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBeUndefined();
+    });
+
+    it('throws when envVar or baseUrl is missing', () => {
+      const secrets: SecretEntry[] = [];
+      const env: Record<string, string> = {};
+      expect(() => applyAgentAuth({
+        value: 'literal-value',
+      } as never, claudeAdapter, secrets, env)).toThrow(/envVar and baseUrl/);
+    });
   });
 });
\ No newline at end of file
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
index 73f3a2e..1fc8efe 100644
--- a/src/sandbox/microsandbox.ts
+++ b/src/sandbox/microsandbox.ts
@@ -50,40 +50,68 @@ export function resolveEnv(
 }
 
 /**
- * Build a microsandbox `Secret.env()` entry from an agent's secret config.
- * The `allowHosts` is derived from the base URL hostname.
+ * Claude Code subscription OAuth tokens are prefixed `sk-ant-oat-` (issued by
+ * `claude setup-token`). API keys are prefixed `sk-ant-api-`. The auth mode is
+ * determined by inspecting the resolved secret value at sandbox-create time —
+ * no separate config flag needed.
  */
-export function buildAgentSecret(secret: AgentSecretConfig, additionalAllowHosts?: string[]): SecretEntry {
-  if (!secret.envVar || !secret.baseUrl) {
-    throw new Error('Agent secret must have envVar and baseUrl set (should be filled by config validation)');
+const OAUTH_TOKEN_PREFIX = 'sk-ant-oat-';
+
+/** Whether the agent secret's resolved value is a Claude Code subscription OAuth token. */
+export function isOAuthSecret(secret: AgentSecretConfig): boolean {
+  if (!secret.envVar) return false;
+  try {
+    const value = resolveValue(secret.value, secret.envVar);
+    return value.startsWith(OAUTH_TOKEN_PREFIX);
+  } catch {
+    return false;
   }
-  const value = resolveValue(secret.value, secret.envVar);
-  const hostname = new URL(secret.baseUrl).hostname;
-  const allowHosts = [hostname, ...(additionalAllowHosts ?? [])];
-  return Secret.env(secret.envVar, { value, allowHosts });
+}
+
+interface AgentAuthAdapter {
+  baseUrlEnvVar: string | null;
+  defaultBaseUrl: string | null;
+  additionalAllowHosts: string[];
 }
 
 /**
- * Resolve the Claude Code OAuth token from the host environment for
- * `useOAuth: true` agent configs. Unlike API keys (TLS-injected as
- * placeholders by microsandbox), OAuth tokens must enter the VM as the real
- * value because Claude reads them directly from `process.env`. The caller
- * places the returned value under `sandbox.env.CLAUDE_CODE_OAUTH_TOKEN`.
+ * Wire an agent's secret into the sandbox `secrets` and `env`, picking the auth
+ * mode by inspecting the resolved value:
+ *
+ * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat-`) → plain
+ *   `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly
+ *   from `process.env`, so microsandbox's TLS-substitution model doesn't apply.
+ * - Everything else (API keys for known agents, custom-agent secrets) → wrapped
+ *   in `Secret.env()` with TLS substitution and the configured base URL env var.
  *
- * Throws with a clear message if `CLAUDE_CODE_OAUTH_TOKEN` is not set on
- * the host — fail-fast so the user knows to run `claude setup-token` and
- * export the result before the eval starts.
+ * Mutates `secrets` and `env` in place.
  */
-export function resolveOAuthToken(): string {
-  const value = process.env.CLAUDE_CODE_OAUTH_TOKEN;
-  if (!value) {
-    throw new Error(
-      "CLAUDE_CODE_OAUTH_TOKEN is not set on the host. " +
-      "Generate a long-lived subscription token with `claude setup-token` " +
-      "and `export CLAUDE_CODE_OAUTH_TOKEN=<value>` before running the eval.",
-    );
+export function applyAgentAuth(
+  secret: AgentSecretConfig,
+  adapter: AgentAuthAdapter,
+  secrets: SecretEntry[],
+  env: Record<string, string>,
+): void {
+  if (!secret.envVar || !secret.baseUrl) {
+    throw new Error('Agent secret must have envVar and baseUrl set (should be filled by config validation)');
+  }
+  const value = resolveValue(secret.value, secret.envVar);
+
+  if (value.startsWith(OAUTH_TOKEN_PREFIX)) {
+    env.CLAUDE_CODE_OAUTH_TOKEN = value;
+    if (adapter.baseUrlEnvVar && adapter.defaultBaseUrl) {
+      env[adapter.baseUrlEnvVar] = adapter.defaultBaseUrl;
+    }
+    return;
+  }
+
+  const hostname = new URL(secret.baseUrl).hostname;
+  const allowHosts = [hostname, ...adapter.additionalAllowHosts];
+  secrets.push(Secret.env(secret.envVar, { value, allowHosts }));
+  const baseUrlVar = secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
+  if (baseUrlVar) {
+    env[baseUrlVar] = secret.baseUrl;
   }
-  return value;
 }
 
 function resolveValue(value: string, envVar: string): string {
diff --git a/src/scoring/__tests__/judge.test.ts b/src/scoring/__tests__/judge.test.ts
index 87c5b1e..ad13b8b 100644
--- a/src/scoring/__tests__/judge.test.ts
+++ b/src/scoring/__tests__/judge.test.ts
@@ -23,7 +23,8 @@ vi.mock('../../sandbox/microsandbox.js', () => ({
     Object.assign(this, mockClient);
   }),
   buildSecrets: vi.fn().mockReturnValue([]),
-  buildAgentSecret: vi.fn().mockReturnValue({}),
+  applyAgentAuth: vi.fn(),
+  isOAuthSecret: vi.fn().mockReturnValue(false),
   resolveEnv: vi.fn().mockReturnValue({}),
 }));
 
diff --git a/src/scoring/judge.ts b/src/scoring/judge.ts
index dcf553d..eaae607 100644
--- a/src/scoring/judge.ts
+++ b/src/scoring/judge.ts
@@ -1,7 +1,7 @@
 import type { SolutionFile, JudgeScore, TestCase, SandboxAgentConfig, TargetConfig, Config, ProjectPaths, SourceConfig } from '../types.js';
 import { createAdapter } from '../agents/adapter.js';
 import { JUDGE_SCORING_CRITERIA, extractJson } from '../commands/prompt-helpers.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, resolveOAuthToken } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, isOAuthSecret, resolveEnv } from '../sandbox/microsandbox.js';
 import { createEgressLockdownLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { deduplicateSources } from '../core/source-resolver.js';
@@ -135,14 +135,15 @@ const INFRA_ALLOWLIST = [
 export function buildJudgeAllowlist(judgeConfig: SandboxAgentConfig, config: Config): string[] {
   const hosts = new Set<string>();
 
-  // 1. Agent API endpoint — from secret.baseUrl (API-key path) or adapter default (OAuth path).
-  if (judgeConfig.secret?.baseUrl) {
-    try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
-  } else if (judgeConfig.useOAuth) {
+  // 1. Agent API endpoint — adapter default for OAuth tokens (Claude reads directly
+  //    from process.env, so secret.baseUrl is irrelevant), else secret.baseUrl.
+  if (isOAuthSecret(judgeConfig.secret)) {
     const adapter = createAdapter(judgeConfig);
     if (adapter.defaultBaseUrl) {
       try { hosts.add(new URL(adapter.defaultBaseUrl).hostname); } catch { /* skip malformed */ }
     }
+  } else if (judgeConfig.secret.baseUrl) {
+    try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
   }
 
   // 2. Secrets allowHosts
@@ -291,20 +292,8 @@ export async function runSandboxedJudge(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Resolve agent auth — same two-path model as the executor.
     const judgeAdapter = createAdapter(judgeConfig);
-    if (judgeConfig.useOAuth) {
-      env.CLAUDE_CODE_OAUTH_TOKEN = resolveOAuthToken();
-      if (judgeAdapter.baseUrlEnvVar && judgeAdapter.defaultBaseUrl) {
-        env[judgeAdapter.baseUrlEnvVar] = judgeAdapter.defaultBaseUrl;
-      }
-    } else if (judgeConfig.secret) {
-      secrets.push(buildAgentSecret(judgeConfig.secret, judgeAdapter.additionalAllowHosts));
-      const baseUrlVar = judgeConfig.secret.baseUrlEnvVar ?? judgeAdapter.baseUrlEnvVar;
-      if (baseUrlVar && judgeConfig.secret.baseUrl) {
-        env[baseUrlVar] = judgeConfig.secret.baseUrl;
-      }
-    }
+    applyAgentAuth(judgeConfig.secret, judgeAdapter, secrets, env);
 
     await client.create(
       sandboxName(testCase.id),
diff --git a/src/types.ts b/src/types.ts
index 4e04224..966ecd6 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -111,24 +111,26 @@ export interface AgentConfig {
 
 /** Agent config for sandboxed execution (executor/judge).
  *
- * Auth: exactly one of `secret` or `useOAuth: true` is required.
+ * Auth mode is auto-detected from the resolved `secret.value`:
  *
- * - `secret` is the API-key path — values are TLS-injected by microsandbox so
- *   the cleartext never enters the VM, and the env contains a placeholder
- *   substituted on the wire only for the agent's allowed host.
- * - `useOAuth: true` is the Claude Code subscription path — reads
- *   `CLAUDE_CODE_OAUTH_TOKEN` (generated by `claude setup-token`, requires
- *   Pro / Max / Team / Enterprise) from the host environment and injects it
- *   into the sandbox as a plain env var. Subscription auth is required here
- *   because Claude reads the token directly from `process.env`; the TLS
- *   substitution model does not work for OAuth. Only valid when
- *   `command: "claude"`.
+ * - API keys (anything not matching the OAuth prefix) are TLS-injected by
+ *   microsandbox — cleartext never enters the VM; the env var inside the
+ *   sandbox contains a placeholder substituted on the wire only for the
+ *   agent's allowed host.
+ * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat-`, issued by
+ *   `claude setup-token`) are injected as a plain `CLAUDE_CODE_OAUTH_TOKEN`
+ *   env var. Claude Code reads the token directly from `process.env`, so the
+ *   TLS-substitution model does not apply. Avoids per-token API billing on
+ *   Pro / Max / Team / Enterprise plans.
+ *
+ * Point `secret.value` at the host env var that holds the credential —
+ * `$ANTHROPIC_API_KEY` for the API-key path, `$CLAUDE_CODE_OAUTH_TOKEN` for
+ * the subscription path. The runtime sniffs the resolved value to pick the
+ * path.
  */
 export interface SandboxAgentConfig extends AgentConfig {
-  /** Agent's API secret and base URL. Flows to microsandbox TLS injection, sandbox env, and judge lockdown allowlist. */
-  secret?: AgentSecretConfig;
-  /** Use Claude Code subscription auth via `CLAUDE_CODE_OAUTH_TOKEN`. Only valid for `command: "claude"`. */
-  useOAuth?: boolean;
+  /** Agent's secret and base URL. Auth mode is determined from the resolved value's prefix. */
+  secret: AgentSecretConfig;
 }
 
 export interface TargetConfig {

From 7d510fff304a6723de6ae74f5cde5b0ec4fa71f8 Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Fri, 15 May 2026 10:42:25 +1200
Subject: [PATCH 3/6] fix(auth): OAuth token prefix is sk-ant-oat (followed by
 version), not sk-ant-oat-
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Smoke-tested against a real `claude setup-token` token and discovered the
prefix is `sk-ant-oat01-…`, not `sk-ant-oat-…`. The trailing dash in
OAUTH_TOKEN_PREFIX caused all real OAuth tokens to misclassify as API keys
and route through the TLS-injection path.

Dropping the trailing dash:
  - matches every documented variant: `sk-ant-oat01-…`, `sk-ant-oat02-…`, etc.
  - still cleanly distinguishes from API keys (`sk-ant-api…`) since `oat` ≠ `api`.

Test fixtures and docs updated to the real `sk-ant-oat01-` form. Verified
end-to-end with the smoke script: isOAuthSecret returns true, applyAgentAuth
populates env.CLAUDE_CODE_OAUTH_TOKEN as plain env var, no TLS secrets added.

331 tests pass; type-check + lint clean.
---
 README.md                                  |  2 +-
 skills/_reference/config-schema.md         |  4 ++--
 src/sandbox/__tests__/microsandbox.test.ts |  8 ++++----
 src/sandbox/microsandbox.ts                | 14 ++++++++------
 src/types.ts                               |  3 ++-
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index f907d61..91671f6 100644
--- a/README.md
+++ b/README.md
@@ -384,7 +384,7 @@ claude setup-token             # interactive — generates a long-lived OAuth to
 export CLAUDE_CODE_OAUTH_TOKEN='<token>'   # before running the eval
 ```
 
-How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat-`; API keys start with `sk-ant-api-`. When the value is an OAuth token, it's injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var (Claude Code reads it directly from `process.env`; the API-key TLS-substitution model doesn't apply for OAuth). Subscription concurrent-session caps apply.
+How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat` (e.g. `sk-ant-oat01-…`); API keys start with `sk-ant-api` (e.g. `sk-ant-api03-…`). When the value is an OAuth token, it's injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var (Claude Code reads it directly from `process.env`; the API-key TLS-substitution model doesn't apply for OAuth). Subscription concurrent-session caps apply.
 
 #### Custom agents
 
diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
index 8314c14..23fad17 100644
--- a/skills/_reference/config-schema.md
+++ b/skills/_reference/config-schema.md
@@ -84,8 +84,8 @@ Extends AgentConfig with one **required** field:
 
 Auth mode is auto-detected from the resolved `secret.value`'s prefix at sandbox-create time:
 
-- `sk-ant-api-…` (Anthropic API key) → microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
-- `sk-ant-oat-…` (Claude Code subscription OAuth token, issued by `claude setup-token`) → injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly from `process.env`, so the TLS-substitution model does not apply. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
+- `sk-ant-api…` (Anthropic API key, e.g. `sk-ant-api03-…`) → microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
+- `sk-ant-oat…` (Claude Code subscription OAuth token, e.g. `sk-ant-oat01-…`, issued by `claude setup-token`) → injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly from `process.env`, so the TLS-substitution model does not apply. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
 
 Point `secret.value` at the host env var that holds the credential — `"$ANTHROPIC_API_KEY"` for the API-key path, `"$CLAUDE_CODE_OAUTH_TOKEN"` for the subscription path.
 
diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
index 991bab4..d8a7865 100644
--- a/src/sandbox/__tests__/microsandbox.test.ts
+++ b/src/sandbox/__tests__/microsandbox.test.ts
@@ -351,8 +351,8 @@ describe('agent secret auth-mode detection', () => {
   };
 
   describe('isOAuthSecret', () => {
-    it('returns true when the resolved value starts with sk-ant-oat-', () => {
-      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-fake-test-token';
+    it('returns true when the resolved value starts with sk-ant-oat (followed by a version, e.g. sk-ant-oat01-)', () => {
+      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
       expect(isOAuthSecret({
         envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
         value: '$CLAUDE_CODE_OAUTH_TOKEN',
@@ -381,7 +381,7 @@ describe('agent secret auth-mode detection', () => {
 
   describe('applyAgentAuth', () => {
     it('injects CLAUDE_CODE_OAUTH_TOKEN as a plain env var when value is an OAuth token', () => {
-      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat-fake-test-token';
+      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
       const secrets: SecretEntry[] = [];
       const env: Record<string, string> = {};
       applyAgentAuth({
@@ -389,7 +389,7 @@ describe('agent secret auth-mode detection', () => {
         value: '$CLAUDE_CODE_OAUTH_TOKEN',
         baseUrl: 'https://api.anthropic.com',
       }, claudeAdapter, secrets, env);
-      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat-fake-test-token');
+      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-fake-test-token');
       expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
       expect(secrets).toHaveLength(0);
     });
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
index 1fc8efe..0a8dfb0 100644
--- a/src/sandbox/microsandbox.ts
+++ b/src/sandbox/microsandbox.ts
@@ -50,12 +50,13 @@ export function resolveEnv(
 }
 
 /**
- * Claude Code subscription OAuth tokens are prefixed `sk-ant-oat-` (issued by
- * `claude setup-token`). API keys are prefixed `sk-ant-api-`. The auth mode is
- * determined by inspecting the resolved secret value at sandbox-create time —
- * no separate config flag needed.
+ * Claude Code subscription OAuth tokens are prefixed `sk-ant-oat` followed by a
+ * version number (e.g. `sk-ant-oat01-…`), issued by `claude setup-token`. API
+ * keys use `sk-ant-api` (e.g. `sk-ant-api03-…`). The auth mode is determined
+ * by inspecting the resolved secret value at sandbox-create time — no separate
+ * config flag needed.
  */
-const OAUTH_TOKEN_PREFIX = 'sk-ant-oat-';
+const OAUTH_TOKEN_PREFIX = 'sk-ant-oat';
 
 /** Whether the agent secret's resolved value is a Claude Code subscription OAuth token. */
 export function isOAuthSecret(secret: AgentSecretConfig): boolean {
@@ -78,7 +79,8 @@ interface AgentAuthAdapter {
  * Wire an agent's secret into the sandbox `secrets` and `env`, picking the auth
  * mode by inspecting the resolved value:
  *
- * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat-`) → plain
+ * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat`, e.g.
+ *   `sk-ant-oat01-…`) → plain
  *   `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly
  *   from `process.env`, so microsandbox's TLS-substitution model doesn't apply.
  * - Everything else (API keys for known agents, custom-agent secrets) → wrapped
diff --git a/src/types.ts b/src/types.ts
index 966ecd6..7b92ef4 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -117,7 +117,8 @@ export interface AgentConfig {
  *   microsandbox — cleartext never enters the VM; the env var inside the
  *   sandbox contains a placeholder substituted on the wire only for the
  *   agent's allowed host.
- * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat-`, issued by
+ * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat`, e.g.
+ *   `sk-ant-oat01-…`, issued by
  *   `claude setup-token`) are injected as a plain `CLAUDE_CODE_OAUTH_TOKEN`
  *   env var. Claude Code reads the token directly from `process.env`, so the
  *   TLS-substitution model does not apply. Avoids per-token API billing on

From 6bc5c3aebd47a378ccea4d69964df0225dac0ba1 Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Fri, 15 May 2026 11:24:55 +1200
Subject: [PATCH 4/6] refactor(auth): unify OAuth and API-key paths via TLS
 substitution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original PR routed OAuth through plain env-var injection on the
assumption that Claude Code's CLAUDE_CODE_OAUTH_TOKEN reader was
incompatible with microsandbox's wire-time placeholder substitution.
Smoke-testing the placeholder path against a real Claude Code session
proved that wrong: Claude tolerates a `\$MSB_CLAUDE_CODE_OAUTH_TOKEN`
placeholder as the env var value, constructs `Authorization: Bearer
\$MSB_…` as the outbound header, and microsandbox substitutes the
placeholder for the real OAuth token at TLS interception time —
Anthropic returned 200 on `/api/eval/sdk-…` and the eval completed
end-to-end.

Collapse the two-mode dispatch in `applyAgentAuth` into one TLS-substituted
path. The resolved value's prefix only picks the env var name that carries
the placeholder:
- `sk-ant-oat…` → `CLAUDE_CODE_OAUTH_TOKEN`
- anything else → `secret.envVar` (= `ANTHROPIC_API_KEY` for claude, etc.)

Benefits:
- OAuth recovers the same "cleartext never enters the VM" security property
  API keys already had — the real subscription token only ever touches the
  outbound TLS layer to api.anthropic.com.
- One code path, fewer test modes. \`isOAuthSecret\` was only used to pick
  the env var name (now inlined as a local conditional) and to choose the
  allowlist hostname in \`buildJudgeAllowlist\` — but both auth paths now
  derive that hostname from \`secret.baseUrl\` (validation already fills it
  from the adapter default for known agents), so the OAuth branch in
  \`buildJudgeAllowlist\` is gone too.
- Less surface area in the public module API (\`isOAuthSecret\` removed
  from exports).

Tests collapse from a four-test isOAuthSecret + applyAgentAuth suite to
three unified \`applyAgentAuth\` cases (OAuth value → CLAUDE_CODE_OAUTH_TOKEN
slot; API-key value → adapter slot; precondition error). 328 unit tests
pass; type-check + lint clean.

End-to-end verified against TC-001 with a real CLAUDE_CODE_OAUTH_TOKEN:
exit 0, 27s, real solution produced; egress log shows
\`Authorization: Bearer \$MSB_CLAUDE_CODE_OAUTH_TOKEN\` (pre-substitution),
\`/api/claude_code/settings\` returns 404 (auth accepted; would be 401 if
the placeholder leaked to the wire).
---
 README.md                                  |   2 +-
 skills/_reference/config-schema.md         |   8 +-
 src/sandbox/__tests__/microsandbox.test.ts | 106 ++++++++-------------
 src/sandbox/microsandbox.ts                |  44 ++++-----
 src/scoring/judge.ts                       |  14 +--
 src/types.ts                               |  26 ++---
 6 files changed, 82 insertions(+), 118 deletions(-)

diff --git a/README.md b/README.md
index 91671f6..982bb7e 100644
--- a/README.md
+++ b/README.md
@@ -384,7 +384,7 @@ claude setup-token             # interactive — generates a long-lived OAuth to
 export CLAUDE_CODE_OAUTH_TOKEN='<token>'   # before running the eval
 ```
 
-How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat` (e.g. `sk-ant-oat01-…`); API keys start with `sk-ant-api` (e.g. `sk-ant-api03-…`). When the value is an OAuth token, it's injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var (Claude Code reads it directly from `process.env`; the API-key TLS-substitution model doesn't apply for OAuth). Subscription concurrent-session caps apply.
+How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat` (e.g. `sk-ant-oat01-…`); API keys start with `sk-ant-api` (e.g. `sk-ant-api03-…`). Both paths flow through microsandbox's `Secret.env()` TLS substitution — cleartext never enters the VM; the env var inside the sandbox contains a placeholder, and microsandbox swaps it for the real value on outbound TLS to `api.anthropic.com` only. The prefix only decides which env var name (`CLAUDE_CODE_OAUTH_TOKEN` vs `ANTHROPIC_API_KEY`) carries the placeholder. Subscription concurrent-session caps apply.
 
 #### Custom agents
 
diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
index 23fad17..79af65b 100644
--- a/skills/_reference/config-schema.md
+++ b/skills/_reference/config-schema.md
@@ -82,10 +82,12 @@ Extends AgentConfig with one **required** field:
 |-------|------|----------|
 | `secret` | `AgentSecretConfig` | **Yes** |
 
-Auth mode is auto-detected from the resolved `secret.value`'s prefix at sandbox-create time:
+Both auth modes flow through microsandbox `Secret.env()` TLS substitution — the cleartext credential never enters the VM. Inside the sandbox the env var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the real value on outbound TLS to the allowed host only.
 
-- `sk-ant-api…` (Anthropic API key, e.g. `sk-ant-api03-…`) → microsandbox TLS-injects the value, so the cleartext never enters the VM. Inside the sandbox the env var contains only a placeholder substituted on the wire for the allowed host.
-- `sk-ant-oat…` (Claude Code subscription OAuth token, e.g. `sk-ant-oat01-…`, issued by `claude setup-token`) → injected as a plain `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly from `process.env`, so the TLS-substitution model does not apply. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
+The resolved `secret.value`'s prefix picks which env var name carries the placeholder:
+
+- `sk-ant-api…` (Anthropic API key, e.g. `sk-ant-api03-…`) → `ANTHROPIC_API_KEY`.
+- `sk-ant-oat…` (Claude Code subscription OAuth token, e.g. `sk-ant-oat01-…`, issued by `claude setup-token`) → `CLAUDE_CODE_OAUTH_TOKEN`. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
 
 Point `secret.value` at the host env var that holds the credential — `"$ANTHROPIC_API_KEY"` for the API-key path, `"$CLAUDE_CODE_OAUTH_TOKEN"` for the subscription path.
 
diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
index d8a7865..50346bf 100644
--- a/src/sandbox/__tests__/microsandbox.test.ts
+++ b/src/sandbox/__tests__/microsandbox.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import type { SecretEntry } from 'microsandbox';
-import { MicrosandboxClient, buildSecrets, resolveEnv, applyAgentAuth, isOAuthSecret } from '../microsandbox.js';
+import { MicrosandboxClient, buildSecrets, resolveEnv, applyAgentAuth } from '../microsandbox.js';
 
 // ── Mocks ────────────────────────────────────────────────────────────────────
 
@@ -331,7 +331,7 @@ describe('MicrosandboxClient', () => {
   });
 });
 
-describe('agent secret auth-mode detection', () => {
+describe('applyAgentAuth', () => {
   const ORIGINAL_API_KEY = process.env.ANTHROPIC_API_KEY;
   const ORIGINAL_OAUTH = process.env.CLAUDE_CODE_OAUTH_TOKEN;
 
@@ -350,71 +350,47 @@ describe('agent secret auth-mode detection', () => {
     additionalAllowHosts: [],
   };
 
-  describe('isOAuthSecret', () => {
-    it('returns true when the resolved value starts with sk-ant-oat (followed by a version, e.g. sk-ant-oat01-)', () => {
-      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
-      expect(isOAuthSecret({
-        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
-        value: '$CLAUDE_CODE_OAUTH_TOKEN',
-        baseUrl: 'https://api.anthropic.com',
-      })).toBe(true);
-    });
-
-    it('returns false for an API-key shaped value', () => {
-      process.env.ANTHROPIC_API_KEY = 'sk-ant-api-fake-test-key';
-      expect(isOAuthSecret({
-        envVar: 'ANTHROPIC_API_KEY',
-        value: '$ANTHROPIC_API_KEY',
-        baseUrl: 'https://api.anthropic.com',
-      })).toBe(false);
-    });
-
-    it('returns false when the referenced host env var is unset (no throw)', () => {
-      delete process.env.CLAUDE_CODE_OAUTH_TOKEN;
-      expect(isOAuthSecret({
-        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
-        value: '$CLAUDE_CODE_OAUTH_TOKEN',
-        baseUrl: 'https://api.anthropic.com',
-      })).toBe(false);
-    });
+  // The microsandbox `SecretEntry` is opaque, but inspecting its keys gives us
+  // enough confidence that the right env var name is being TLS-substituted.
+  const secretEnvVarName = (entry: SecretEntry): string | undefined =>
+    (entry as { envVar?: string; env_var?: string; name?: string }).envVar
+    ?? (entry as { env_var?: string }).env_var
+    ?? (entry as { name?: string }).name;
+
+  it('routes an OAuth-prefixed value through Secret.env under CLAUDE_CODE_OAUTH_TOKEN', () => {
+    process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
+    const secrets: SecretEntry[] = [];
+    const env: Record<string, string> = {};
+    applyAgentAuth({
+      envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
+      value: '$CLAUDE_CODE_OAUTH_TOKEN',
+      baseUrl: 'https://api.anthropic.com',
+    }, claudeAdapter, secrets, env);
+    expect(secrets).toHaveLength(1);
+    expect(secretEnvVarName(secrets[0])).toBe('CLAUDE_CODE_OAUTH_TOKEN');
+    expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
   });
 
-  describe('applyAgentAuth', () => {
-    it('injects CLAUDE_CODE_OAUTH_TOKEN as a plain env var when value is an OAuth token', () => {
-      process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
-      const secrets: SecretEntry[] = [];
-      const env: Record<string, string> = {};
-      applyAgentAuth({
-        envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
-        value: '$CLAUDE_CODE_OAUTH_TOKEN',
-        baseUrl: 'https://api.anthropic.com',
-      }, claudeAdapter, secrets, env);
-      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-fake-test-token');
-      expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
-      expect(secrets).toHaveLength(0);
-    });
-
-    it('wraps an API-key value in Secret.env() with the agent host on allowHosts', () => {
-      process.env.ANTHROPIC_API_KEY = 'sk-ant-api-fake-test-key';
-      const secrets: SecretEntry[] = [];
-      const env: Record<string, string> = {};
-      applyAgentAuth({
-        envVar: 'ANTHROPIC_API_KEY',
-        value: '$ANTHROPIC_API_KEY',
-        baseUrl: 'https://api.anthropic.com',
-        baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
-      }, claudeAdapter, secrets, env);
-      expect(secrets).toHaveLength(1);
-      expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
-      expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBeUndefined();
-    });
+  it('routes an API-key value through Secret.env under the agent-specific env var', () => {
+    process.env.ANTHROPIC_API_KEY = 'sk-ant-api03-fake-test-key';
+    const secrets: SecretEntry[] = [];
+    const env: Record<string, string> = {};
+    applyAgentAuth({
+      envVar: 'ANTHROPIC_API_KEY',
+      value: '$ANTHROPIC_API_KEY',
+      baseUrl: 'https://api.anthropic.com',
+      baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
+    }, claudeAdapter, secrets, env);
+    expect(secrets).toHaveLength(1);
+    expect(secretEnvVarName(secrets[0])).toBe('ANTHROPIC_API_KEY');
+    expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
+  });
 
-    it('throws when envVar or baseUrl is missing', () => {
-      const secrets: SecretEntry[] = [];
-      const env: Record<string, string> = {};
-      expect(() => applyAgentAuth({
-        value: 'literal-value',
-      } as never, claudeAdapter, secrets, env)).toThrow(/envVar and baseUrl/);
-    });
+  it('throws when envVar or baseUrl is missing', () => {
+    const secrets: SecretEntry[] = [];
+    const env: Record<string, string> = {};
+    expect(() => applyAgentAuth({
+      value: 'literal-value',
+    } as never, claudeAdapter, secrets, env)).toThrow(/envVar and baseUrl/);
   });
 });
\ No newline at end of file
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
index 0a8dfb0..f661894 100644
--- a/src/sandbox/microsandbox.ts
+++ b/src/sandbox/microsandbox.ts
@@ -58,17 +58,6 @@ export function resolveEnv(
  */
 const OAUTH_TOKEN_PREFIX = 'sk-ant-oat';
 
-/** Whether the agent secret's resolved value is a Claude Code subscription OAuth token. */
-export function isOAuthSecret(secret: AgentSecretConfig): boolean {
-  if (!secret.envVar) return false;
-  try {
-    const value = resolveValue(secret.value, secret.envVar);
-    return value.startsWith(OAUTH_TOKEN_PREFIX);
-  } catch {
-    return false;
-  }
-}
-
 interface AgentAuthAdapter {
   baseUrlEnvVar: string | null;
   defaultBaseUrl: string | null;
@@ -76,15 +65,19 @@ interface AgentAuthAdapter {
 }
 
 /**
- * Wire an agent's secret into the sandbox `secrets` and `env`, picking the auth
- * mode by inspecting the resolved value:
+ * Wire an agent's secret into the sandbox `secrets` and `env`.
+ *
+ * Both auth modes (API key and Claude Code subscription OAuth) go through
+ * microsandbox `Secret.env()` TLS substitution — the cleartext value never
+ * enters the VM. Inside the sandbox the env var contains the
+ * `$MSB_<env-var-name>` placeholder; microsandbox swaps it for the real value
+ * on outbound TLS to the allowed host only.
  *
- * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat`, e.g.
- *   `sk-ant-oat01-…`) → plain
- *   `CLAUDE_CODE_OAUTH_TOKEN` env var. Claude Code reads the token directly
- *   from `process.env`, so microsandbox's TLS-substitution model doesn't apply.
- * - Everything else (API keys for known agents, custom-agent secrets) → wrapped
- *   in `Secret.env()` with TLS substitution and the configured base URL env var.
+ * The resolved value's prefix picks which env var name carries the placeholder:
+ * - `sk-ant-oat…` (Claude Code subscription OAuth, issued by `claude setup-token`)
+ *   → `CLAUDE_CODE_OAUTH_TOKEN`
+ * - anything else (API keys for known agents, custom-agent secrets)
+ *   → `secret.envVar` (= `ANTHROPIC_API_KEY` for claude, etc.)
  *
  * Mutates `secrets` and `env` in place.
  */
@@ -99,17 +92,14 @@ export function applyAgentAuth(
   }
   const value = resolveValue(secret.value, secret.envVar);
 
-  if (value.startsWith(OAUTH_TOKEN_PREFIX)) {
-    env.CLAUDE_CODE_OAUTH_TOKEN = value;
-    if (adapter.baseUrlEnvVar && adapter.defaultBaseUrl) {
-      env[adapter.baseUrlEnvVar] = adapter.defaultBaseUrl;
-    }
-    return;
-  }
+  const envVar = value.startsWith(OAUTH_TOKEN_PREFIX)
+    ? 'CLAUDE_CODE_OAUTH_TOKEN'
+    : secret.envVar;
 
   const hostname = new URL(secret.baseUrl).hostname;
   const allowHosts = [hostname, ...adapter.additionalAllowHosts];
-  secrets.push(Secret.env(secret.envVar, { value, allowHosts }));
+  secrets.push(Secret.env(envVar, { value, allowHosts }));
+
   const baseUrlVar = secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
   if (baseUrlVar) {
     env[baseUrlVar] = secret.baseUrl;
diff --git a/src/scoring/judge.ts b/src/scoring/judge.ts
index eaae607..7650154 100644
--- a/src/scoring/judge.ts
+++ b/src/scoring/judge.ts
@@ -1,7 +1,7 @@
 import type { SolutionFile, JudgeScore, TestCase, SandboxAgentConfig, TargetConfig, Config, ProjectPaths, SourceConfig } from '../types.js';
 import { createAdapter } from '../agents/adapter.js';
 import { JUDGE_SCORING_CRITERIA, extractJson } from '../commands/prompt-helpers.js';
-import { MicrosandboxClient, buildSecrets, applyAgentAuth, isOAuthSecret, resolveEnv } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv } from '../sandbox/microsandbox.js';
 import { createEgressLockdownLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { deduplicateSources } from '../core/source-resolver.js';
@@ -135,14 +135,10 @@ const INFRA_ALLOWLIST = [
 export function buildJudgeAllowlist(judgeConfig: SandboxAgentConfig, config: Config): string[] {
   const hosts = new Set<string>();
 
-  // 1. Agent API endpoint — adapter default for OAuth tokens (Claude reads directly
-  //    from process.env, so secret.baseUrl is irrelevant), else secret.baseUrl.
-  if (isOAuthSecret(judgeConfig.secret)) {
-    const adapter = createAdapter(judgeConfig);
-    if (adapter.defaultBaseUrl) {
-      try { hosts.add(new URL(adapter.defaultBaseUrl).hostname); } catch { /* skip malformed */ }
-    }
-  } else if (judgeConfig.secret.baseUrl) {
+  // 1. Agent API endpoint — secret.baseUrl is always populated by validation
+  //    (filled from adapter defaults for known agents). Same source whether
+  //    the secret resolves to an API key or an OAuth token.
+  if (judgeConfig.secret.baseUrl) {
     try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
   }
 
diff --git a/src/types.ts b/src/types.ts
index 7b92ef4..1621ace 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -111,23 +111,23 @@ export interface AgentConfig {
 
 /** Agent config for sandboxed execution (executor/judge).
  *
- * Auth mode is auto-detected from the resolved `secret.value`:
+ * Both auth modes flow through microsandbox `Secret.env()` TLS substitution —
+ * the cleartext credential never enters the VM. Inside the sandbox the env
+ * var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the
+ * real value on outbound TLS to the allowed host only.
  *
- * - API keys (anything not matching the OAuth prefix) are TLS-injected by
- *   microsandbox — cleartext never enters the VM; the env var inside the
- *   sandbox contains a placeholder substituted on the wire only for the
- *   agent's allowed host.
- * - Claude Code subscription OAuth tokens (prefix `sk-ant-oat`, e.g.
- *   `sk-ant-oat01-…`, issued by
- *   `claude setup-token`) are injected as a plain `CLAUDE_CODE_OAUTH_TOKEN`
- *   env var. Claude Code reads the token directly from `process.env`, so the
- *   TLS-substitution model does not apply. Avoids per-token API billing on
- *   Pro / Max / Team / Enterprise plans.
+ * The resolved `secret.value`'s prefix picks which env var name carries the
+ * placeholder:
+ *
+ * - `sk-ant-oat…` (Claude Code subscription OAuth token, issued by
+ *   `claude setup-token`, requires Pro / Max / Team / Enterprise) →
+ *   `CLAUDE_CODE_OAUTH_TOKEN`. Avoids per-token API billing.
+ * - anything else (API keys for known agents, custom-agent secrets) →
+ *   `secret.envVar` (= `ANTHROPIC_API_KEY` for claude, etc.).
  *
  * Point `secret.value` at the host env var that holds the credential —
  * `$ANTHROPIC_API_KEY` for the API-key path, `$CLAUDE_CODE_OAUTH_TOKEN` for
- * the subscription path. The runtime sniffs the resolved value to pick the
- * path.
+ * the subscription path.
  */
 export interface SandboxAgentConfig extends AgentConfig {
   /** Agent's secret and base URL. Auth mode is determined from the resolved value's prefix. */

From 13ccece01558ee4911a315a3c19c2db27175ee9e Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Fri, 15 May 2026 11:28:12 +1200
Subject: [PATCH 5/6] docs: scope OAuth-vs-API-key prefix detection to the
 claude adapter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The schema reference previously read like the `sk-ant-oat…` /
`sk-ant-api…` prefix dispatch was a generic feature across all
adapters. In reality it's a claude-only fork — codex, gemini, and
custom agents only have the API-key path today. Reframe the
SandboxAgentConfig description so the default behavior leads (TLS
substitution into the adapter-default env var) and the OAuth slot is
clearly tagged as the claude-specific opt-in.
---
 skills/_reference/config-schema.md | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
index 79af65b..276784f 100644
--- a/skills/_reference/config-schema.md
+++ b/skills/_reference/config-schema.md
@@ -82,14 +82,11 @@ Extends AgentConfig with one **required** field:
 |-------|------|----------|
 | `secret` | `AgentSecretConfig` | **Yes** |
 
-Both auth modes flow through microsandbox `Secret.env()` TLS substitution — the cleartext credential never enters the VM. Inside the sandbox the env var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the real value on outbound TLS to the allowed host only.
+The resolved `secret.value` is wired into the sandbox via microsandbox `Secret.env()` TLS substitution — the cleartext credential never enters the VM. Inside the sandbox the env var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the real value on outbound TLS to the allowed host only.
 
-The resolved `secret.value`'s prefix picks which env var name carries the placeholder:
+By default the placeholder lands under the adapter's API-key env var (e.g. `ANTHROPIC_API_KEY` for claude, see [Known Agent Defaults](#known-agent-defaults-auto-filled-when-field-is-absent) below).
 
-- `sk-ant-api…` (Anthropic API key, e.g. `sk-ant-api03-…`) → `ANTHROPIC_API_KEY`.
-- `sk-ant-oat…` (Claude Code subscription OAuth token, e.g. `sk-ant-oat01-…`, issued by `claude setup-token`) → `CLAUDE_CODE_OAUTH_TOKEN`. Avoids per-token API billing on Pro / Max / Team / Enterprise plans.
-
-Point `secret.value` at the host env var that holds the credential — `"$ANTHROPIC_API_KEY"` for the API-key path, `"$CLAUDE_CODE_OAUTH_TOKEN"` for the subscription path.
+**Claude-only: subscription auth.** When `command: "claude"` and the resolved value starts with `sk-ant-oat` (a Claude Code subscription OAuth token issued by `claude setup-token`, e.g. `sk-ant-oat01-…`), the placeholder lands under `CLAUDE_CODE_OAUTH_TOKEN` instead. This lets you bill the run against a Pro / Max / Team / Enterprise plan instead of per-token API charges. Point `secret.value` at `"$CLAUDE_CODE_OAUTH_TOKEN"` to opt in. Other adapters (codex, gemini, custom) only have the API-key path today.
 
 ### AgentSecretConfig
 

From 8575a876b14decf40e5aa76b056b147061e33c35 Mon Sep 17 00:00:00 2001
From: nickwinder <nfxdevelopment@gmail.com>
Date: Fri, 15 May 2026 11:33:24 +1200
Subject: [PATCH 6/6] =?UTF-8?q?refactor(auth):=20simplify=20applyAgentAuth?=
 =?UTF-8?q?=20=E2=80=94=20reuse=20AgentAdapter,=20drop=20reflection=20in?=
 =?UTF-8?q?=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Code review on the PR surfaced four cleanups, all in the diff and none
changing runtime behaviour:

- `applyAgentAuth` accepted a local `AgentAuthAdapter` interface that
  duplicated three fields of the exported `AgentAdapter`. Switched to
  `Pick<AgentAdapter, 'baseUrlEnvVar' | 'additionalAllowHosts'>` so the
  shape stays tied to the real adapter type and unused fields
  (`defaultBaseUrl`) drop out.
- Added a paired `OAUTH_TOKEN_ENV_VAR = 'CLAUDE_CODE_OAUTH_TOKEN'`
  constant next to `OAUTH_TOKEN_PREFIX` so the two Anthropic-specific
  strings live together; replaced the inline literal.
- The `applyAgentAuth` tests previously inspected the opaque
  `SecretEntry` shape through a three-field reflection helper — fragile
  if the microsandbox SDK ever renames an internal field. Tests now
  assert on `Secret.env`'s call args (already mocked in this file), so
  we verify the wire contract rather than the library's internal
  representation.
- Dropped dead `isOAuthSecret: vi.fn()` mock entries in
  `execute.test.ts` and `judge.test.ts` (the function was unexported
  in an earlier refactor commit; the mocks were leftovers).
- Trimmed a verbose comment in `buildJudgeAllowlist` that restated what
  the surrounding `if` already expressed.

328 tests pass; type-check + lint clean. No runtime behaviour change,
so no re-smoke needed.
---
 src/commands/__tests__/execute.test.ts     |  1 -
 src/sandbox/__tests__/microsandbox.test.ts | 43 ++++++++++------------
 src/sandbox/microsandbox.ts                | 24 +++++-------
 src/scoring/__tests__/judge.test.ts        |  1 -
 src/scoring/judge.ts                       |  4 +-
 5 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/src/commands/__tests__/execute.test.ts b/src/commands/__tests__/execute.test.ts
index e600125..cd9a570 100644
--- a/src/commands/__tests__/execute.test.ts
+++ b/src/commands/__tests__/execute.test.ts
@@ -42,7 +42,6 @@ vi.mock('../../sandbox/microsandbox.js', () => {
     MicrosandboxClient: MockMicrosandboxClient,
     buildSecrets: vi.fn().mockReturnValue([]),
     applyAgentAuth: vi.fn(),
-    isOAuthSecret: vi.fn().mockReturnValue(false),
     resolveEnv: vi.fn().mockReturnValue({}),
   };
 });
diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
index 50346bf..fe823b4 100644
--- a/src/sandbox/__tests__/microsandbox.test.ts
+++ b/src/sandbox/__tests__/microsandbox.test.ts
@@ -1,5 +1,4 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import type { SecretEntry } from 'microsandbox';
 import { MicrosandboxClient, buildSecrets, resolveEnv, applyAgentAuth } from '../microsandbox.js';
 
 // ── Mocks ────────────────────────────────────────────────────────────────────
@@ -335,6 +334,10 @@ describe('applyAgentAuth', () => {
   const ORIGINAL_API_KEY = process.env.ANTHROPIC_API_KEY;
   const ORIGINAL_OAUTH = process.env.CLAUDE_CODE_OAUTH_TOKEN;
 
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
   afterEach(() => {
     const restore = (key: string, value: string | undefined) => {
       if (value === undefined) delete process.env[key];
@@ -346,51 +349,43 @@ describe('applyAgentAuth', () => {
 
   const claudeAdapter = {
     baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
-    defaultBaseUrl: 'https://api.anthropic.com',
     additionalAllowHosts: [],
   };
 
-  // The microsandbox `SecretEntry` is opaque, but inspecting its keys gives us
-  // enough confidence that the right env var name is being TLS-substituted.
-  const secretEnvVarName = (entry: SecretEntry): string | undefined =>
-    (entry as { envVar?: string; env_var?: string; name?: string }).envVar
-    ?? (entry as { env_var?: string }).env_var
-    ?? (entry as { name?: string }).name;
-
-  it('routes an OAuth-prefixed value through Secret.env under CLAUDE_CODE_OAUTH_TOKEN', () => {
+  it('routes an OAuth-prefixed value through Secret.env under CLAUDE_CODE_OAUTH_TOKEN', async () => {
+    const { Secret } = await import('microsandbox');
     process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
-    const secrets: SecretEntry[] = [];
-    const env: Record<string, string> = {};
     applyAgentAuth({
       envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
       value: '$CLAUDE_CODE_OAUTH_TOKEN',
       baseUrl: 'https://api.anthropic.com',
-    }, claudeAdapter, secrets, env);
-    expect(secrets).toHaveLength(1);
-    expect(secretEnvVarName(secrets[0])).toBe('CLAUDE_CODE_OAUTH_TOKEN');
-    expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
+    }, claudeAdapter, [], {});
+    expect(Secret.env).toHaveBeenCalledWith('CLAUDE_CODE_OAUTH_TOKEN', expect.objectContaining({
+      value: 'sk-ant-oat01-fake-test-token',
+      allowHosts: ['api.anthropic.com'],
+    }));
   });
 
-  it('routes an API-key value through Secret.env under the agent-specific env var', () => {
+  it('routes an API-key value through Secret.env under the agent-specific env var', async () => {
+    const { Secret } = await import('microsandbox');
     process.env.ANTHROPIC_API_KEY = 'sk-ant-api03-fake-test-key';
-    const secrets: SecretEntry[] = [];
     const env: Record<string, string> = {};
     applyAgentAuth({
       envVar: 'ANTHROPIC_API_KEY',
       value: '$ANTHROPIC_API_KEY',
       baseUrl: 'https://api.anthropic.com',
       baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
-    }, claudeAdapter, secrets, env);
-    expect(secrets).toHaveLength(1);
-    expect(secretEnvVarName(secrets[0])).toBe('ANTHROPIC_API_KEY');
+    }, claudeAdapter, [], env);
+    expect(Secret.env).toHaveBeenCalledWith('ANTHROPIC_API_KEY', expect.objectContaining({
+      value: 'sk-ant-api03-fake-test-key',
+      allowHosts: ['api.anthropic.com'],
+    }));
     expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
   });
 
   it('throws when envVar or baseUrl is missing', () => {
-    const secrets: SecretEntry[] = [];
-    const env: Record<string, string> = {};
     expect(() => applyAgentAuth({
       value: 'literal-value',
-    } as never, claudeAdapter, secrets, env)).toThrow(/envVar and baseUrl/);
+    } as never, claudeAdapter, [], {})).toThrow(/envVar and baseUrl/);
   });
 });
\ No newline at end of file
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
index f661894..b8e6434 100644
--- a/src/sandbox/microsandbox.ts
+++ b/src/sandbox/microsandbox.ts
@@ -5,6 +5,7 @@ import type {
   FsEntry,
 } from 'microsandbox';
 import type { SandboxConfig, SecretConfig, AgentSecretConfig } from '../types.js';
+import type { AgentAdapter } from '../agents/adapter.js';
 
 export interface CommandResult {
   stdout: string;
@@ -49,20 +50,13 @@ export function resolveEnv(
   return resolved;
 }
 
-/**
- * Claude Code subscription OAuth tokens are prefixed `sk-ant-oat` followed by a
- * version number (e.g. `sk-ant-oat01-…`), issued by `claude setup-token`. API
- * keys use `sk-ant-api` (e.g. `sk-ant-api03-…`). The auth mode is determined
- * by inspecting the resolved secret value at sandbox-create time — no separate
- * config flag needed.
- */
+// Claude-specific credential format. Subscription OAuth tokens are prefixed
+// `sk-ant-oat` followed by a version (e.g. `sk-ant-oat01-…`), issued by
+// `claude setup-token`. API keys use `sk-ant-api`. The framework picks the
+// env-var slot the placeholder lands under by inspecting the resolved
+// value's prefix — no separate config flag needed.
 const OAUTH_TOKEN_PREFIX = 'sk-ant-oat';
-
-interface AgentAuthAdapter {
-  baseUrlEnvVar: string | null;
-  defaultBaseUrl: string | null;
-  additionalAllowHosts: string[];
-}
+const OAUTH_TOKEN_ENV_VAR = 'CLAUDE_CODE_OAUTH_TOKEN';
 
 /**
  * Wire an agent's secret into the sandbox `secrets` and `env`.
@@ -83,7 +77,7 @@ interface AgentAuthAdapter {
  */
 export function applyAgentAuth(
   secret: AgentSecretConfig,
-  adapter: AgentAuthAdapter,
+  adapter: Pick<AgentAdapter, 'baseUrlEnvVar' | 'additionalAllowHosts'>,
   secrets: SecretEntry[],
   env: Record<string, string>,
 ): void {
@@ -93,7 +87,7 @@ export function applyAgentAuth(
   const value = resolveValue(secret.value, secret.envVar);
 
   const envVar = value.startsWith(OAUTH_TOKEN_PREFIX)
-    ? 'CLAUDE_CODE_OAUTH_TOKEN'
+    ? OAUTH_TOKEN_ENV_VAR
     : secret.envVar;
 
   const hostname = new URL(secret.baseUrl).hostname;
diff --git a/src/scoring/__tests__/judge.test.ts b/src/scoring/__tests__/judge.test.ts
index ad13b8b..c8952c6 100644
--- a/src/scoring/__tests__/judge.test.ts
+++ b/src/scoring/__tests__/judge.test.ts
@@ -24,7 +24,6 @@ vi.mock('../../sandbox/microsandbox.js', () => ({
   }),
   buildSecrets: vi.fn().mockReturnValue([]),
   applyAgentAuth: vi.fn(),
-  isOAuthSecret: vi.fn().mockReturnValue(false),
   resolveEnv: vi.fn().mockReturnValue({}),
 }));
 
diff --git a/src/scoring/judge.ts b/src/scoring/judge.ts
index 7650154..fe2c42d 100644
--- a/src/scoring/judge.ts
+++ b/src/scoring/judge.ts
@@ -135,9 +135,7 @@ const INFRA_ALLOWLIST = [
 export function buildJudgeAllowlist(judgeConfig: SandboxAgentConfig, config: Config): string[] {
   const hosts = new Set<string>();
 
-  // 1. Agent API endpoint — secret.baseUrl is always populated by validation
-  //    (filled from adapter defaults for known agents). Same source whether
-  //    the secret resolves to an API key or an OAuth token.
+  // 1. Agent API endpoint from secret.baseUrl
   if (judgeConfig.secret.baseUrl) {
     try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
   }