PSPDFKit-labs · nickwinder · May 15, 2026 · May 13, 2026 · May 14, 2026 · May 14, 2026
diff --git a/README.md b/README.md
@@ -364,6 +364,28 @@ Generator and insights agents run locally and do not require a secret.
 | `baseUrl` | API base URL. Hostname is used for network allowlisting. Auto-detected for known agents. |
 | `baseUrlEnvVar` | Override the base URL env var name. Auto-detected for known agents. |
 
+#### Claude Code subscription auth (avoid API billing)
+
+If you have a Claude Pro / Max / Team / Enterprise subscription, sandbox agents using `command: "claude"` can authenticate via your subscription instead of paying per-token API charges. Point `secret.value` at the Claude Code OAuth token instead of an API key:
+
+```json
+{
+  "agents": {
+    "executor": { "command": "claude", "secret": { "value": "$CLAUDE_CODE_OAUTH_TOKEN" } },
+    "judge":    { "command": "claude", "secret": { "value": "$CLAUDE_CODE_OAUTH_TOKEN" } }
+  }
+}
+```
+
+One-time host setup:
+
+```bash
+claude setup-token             # interactive — generates a long-lived OAuth token
+export CLAUDE_CODE_OAUTH_TOKEN='<token>'   # before running the eval
+```
+
+How it works: the runtime sniffs the resolved value's prefix at sandbox-create time. Anthropic OAuth tokens start with `sk-ant-oat` (e.g. `sk-ant-oat01-…`); API keys start with `sk-ant-api` (e.g. `sk-ant-api03-…`). Both paths flow through microsandbox's `Secret.env()` TLS substitution — cleartext never enters the VM; the env var inside the sandbox contains a placeholder, and microsandbox swaps it for the real value on outbound TLS to `api.anthropic.com` only. The prefix only decides which env var name (`CLAUDE_CODE_OAUTH_TOKEN` vs `ANTHROPIC_API_KEY`) carries the placeholder. Subscription concurrent-session caps apply.
+
 #### Custom agents
 
 Custom agents support additional args fields with `{prompt}` and `{workDir}` placeholders:

diff --git a/skills/_reference/config-schema.md b/skills/_reference/config-schema.md
@@ -82,6 +82,12 @@ Extends AgentConfig with one **required** field:
 |-------|------|----------|
 | `secret` | `AgentSecretConfig` | **Yes** |
 
+The resolved `secret.value` is wired into the sandbox via microsandbox `Secret.env()` TLS substitution — the cleartext credential never enters the VM. Inside the sandbox the env var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the real value on outbound TLS to the allowed host only.
+
+By default the placeholder lands under the adapter's API-key env var (e.g. `ANTHROPIC_API_KEY` for claude, see [Known Agent Defaults](#known-agent-defaults-auto-filled-when-field-is-absent) below).
+
+**Claude-only: subscription auth.** When `command: "claude"` and the resolved value starts with `sk-ant-oat` (a Claude Code subscription OAuth token issued by `claude setup-token`, e.g. `sk-ant-oat01-…`), the placeholder lands under `CLAUDE_CODE_OAUTH_TOKEN` instead. This lets you bill the run against a Pro / Max / Team / Enterprise plan instead of per-token API charges. Point `secret.value` at `"$CLAUDE_CODE_OAUTH_TOKEN"` to opt in. Other adapters (codex, gemini, custom) only have the API-key path today.
+
 ### AgentSecretConfig
 
 | Field | Type | Required |

diff --git a/src/commands/__tests__/execute.test.ts b/src/commands/__tests__/execute.test.ts
@@ -41,7 +41,7 @@ vi.mock('../../sandbox/microsandbox.js', () => {
   return {
     MicrosandboxClient: MockMicrosandboxClient,
     buildSecrets: vi.fn().mockReturnValue([]),
-    buildAgentSecret: vi.fn().mockReturnValue({}),
+    applyAgentAuth: vi.fn(),
     resolveEnv: vi.fn().mockReturnValue({}),
   };
 });

diff --git a/src/commands/execute.ts b/src/commands/execute.ts
@@ -3,7 +3,7 @@ import ora from 'ora';
 import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, saveResult, saveBinaryResult, formatElapsed } from '../core/suite-io.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv, type CommandResult } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv, type CommandResult } from '../sandbox/microsandbox.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace } from '../sandbox/scaffolding.js';
 import { WorkerPool } from '../sandbox/worker-pool.js';
@@ -158,15 +158,10 @@ export async function executeTestCase(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Merge agent secret into sandbox secrets and set base URL env var
     const executorConfig: SandboxAgentConfig = config.agents?.executor
       ?? { command: 'claude', secret: { value: '$ANTHROPIC_API_KEY' } };
     const execAdapter = createAdapter(executorConfig);
-    secrets.push(buildAgentSecret(executorConfig.secret, execAdapter.additionalAllowHosts));
-    const baseUrlVar = executorConfig.secret.baseUrlEnvVar ?? execAdapter.baseUrlEnvVar;
-    if (baseUrlVar && executorConfig.secret.baseUrl) {
-      env[baseUrlVar] = executorConfig.secret.baseUrl;
-    }
+    applyAgentAuth(executorConfig.secret, execAdapter, secrets, env);
 
     await client.create(
       sandboxName(testCase.id),

diff --git a/src/commands/sandbox.ts b/src/commands/sandbox.ts
@@ -5,7 +5,7 @@ import { loadDotenv } from '../core/env.js';
 import { loadConfig } from '../core/config.js';
 import { loadTestSuite, loadBinaryResult } from '../core/suite-io.js';
 import { loadJsonFile } from '../core/results.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv } from '../sandbox/microsandbox.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { createEgressLogger } from '../sandbox/egress-logger.js';
 import { createAdapter } from '../agents/adapter.js';
@@ -59,11 +59,7 @@ export async function sandboxCommand(paths: ProjectPaths, options: SandboxOption
   if (options.mode) {
     agentConfig = getAgentConfig(config, options.mode);
     adapter = createAdapter(agentConfig);
-    secrets.push(buildAgentSecret(agentConfig.secret, adapter.additionalAllowHosts));
-    const baseUrlVar = agentConfig.secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
-    if (baseUrlVar && agentConfig.secret.baseUrl) {
-      env[baseUrlVar] = agentConfig.secret.baseUrl;
-    }
+    applyAgentAuth(agentConfig.secret, adapter, secrets, env);
   }
 
   // Prepare output directory for artifacts

diff --git a/src/core/__tests__/config.test.ts b/src/core/__tests__/config.test.ts
@@ -223,4 +223,14 @@ describe('loadConfig', () => {
     mockReadFile.mockResolvedValue(JSON.stringify(config));
     await expect(loadConfig('/fake/config.json')).rejects.toThrow(/valid URL/);
   });
+
+  it('accepts secret pointing at $CLAUDE_CODE_OAUTH_TOKEN (auth mode resolved later by value prefix)', async () => {
+    const config = {
+      ...validConfig,
+      agents: { judge: { command: 'claude', secret: { value: '$CLAUDE_CODE_OAUTH_TOKEN' } } },
+    };
+    mockReadFile.mockResolvedValue(JSON.stringify(config));
+    const result = await loadConfig('/fake/config.json');
+    expect(result.agents?.judge?.secret?.value).toBe('$CLAUDE_CODE_OAUTH_TOKEN');
+  });
 });
diff --git a/src/core/config.ts b/src/core/config.ts
@@ -129,7 +129,9 @@ export function validateConfig(data: unknown, configPath?: string): Config {
       const isSandboxRole = SANDBOX_ROLES.includes(role);
 
       if (isSandboxRole) {
-        // Sandbox agents (executor/judge) require secret
+        // Sandbox agents (executor/judge) require secret. Auth mode (API key vs Claude Code
+        // subscription OAuth token) is auto-detected from the resolved value's prefix at
+        // sandbox-create time.
         if (!agent.secret || typeof agent.secret !== 'object' || Array.isArray(agent.secret)) {
           throw new Error(`agents.${role} requires a secret with at least { value } for secure sandbox execution`);
         }

diff --git a/src/sandbox/__tests__/microsandbox.test.ts b/src/sandbox/__tests__/microsandbox.test.ts
@@ -1,5 +1,5 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { MicrosandboxClient, buildSecrets, resolveEnv } from '../microsandbox.js';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { MicrosandboxClient, buildSecrets, resolveEnv, applyAgentAuth } from '../microsandbox.js';
 
 // ── Mocks ────────────────────────────────────────────────────────────────────
 
@@ -328,4 +328,64 @@ describe('MicrosandboxClient', () => {
       await expect(client.destroy()).resolves.toBeUndefined();
     });
   });
+});
+
+describe('applyAgentAuth', () => {
+  const ORIGINAL_API_KEY = process.env.ANTHROPIC_API_KEY;
+  const ORIGINAL_OAUTH = process.env.CLAUDE_CODE_OAUTH_TOKEN;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  afterEach(() => {
+    const restore = (key: string, value: string | undefined) => {
+      if (value === undefined) delete process.env[key];
+      else process.env[key] = value;
+    };
+    restore('ANTHROPIC_API_KEY', ORIGINAL_API_KEY);
+    restore('CLAUDE_CODE_OAUTH_TOKEN', ORIGINAL_OAUTH);
+  });
+
+  const claudeAdapter = {
+    baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
+    additionalAllowHosts: [],
+  };
+
+  it('routes an OAuth-prefixed value through Secret.env under CLAUDE_CODE_OAUTH_TOKEN', async () => {
+    const { Secret } = await import('microsandbox');
+    process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-fake-test-token';
+    applyAgentAuth({
+      envVar: 'CLAUDE_CODE_OAUTH_TOKEN',
+      value: '$CLAUDE_CODE_OAUTH_TOKEN',
+      baseUrl: 'https://api.anthropic.com',
+    }, claudeAdapter, [], {});
+    expect(Secret.env).toHaveBeenCalledWith('CLAUDE_CODE_OAUTH_TOKEN', expect.objectContaining({
+      value: 'sk-ant-oat01-fake-test-token',
+      allowHosts: ['api.anthropic.com'],
+    }));
+  });
+
+  it('routes an API-key value through Secret.env under the agent-specific env var', async () => {
+    const { Secret } = await import('microsandbox');
+    process.env.ANTHROPIC_API_KEY = 'sk-ant-api03-fake-test-key';
+    const env: Record<string, string> = {};
+    applyAgentAuth({
+      envVar: 'ANTHROPIC_API_KEY',
+      value: '$ANTHROPIC_API_KEY',
+      baseUrl: 'https://api.anthropic.com',
+      baseUrlEnvVar: 'ANTHROPIC_BASE_URL',
+    }, claudeAdapter, [], env);
+    expect(Secret.env).toHaveBeenCalledWith('ANTHROPIC_API_KEY', expect.objectContaining({
+      value: 'sk-ant-api03-fake-test-key',
+      allowHosts: ['api.anthropic.com'],
+    }));
+    expect(env.ANTHROPIC_BASE_URL).toBe('https://api.anthropic.com');
+  });
+
+  it('throws when envVar or baseUrl is missing', () => {
+    expect(() => applyAgentAuth({
+      value: 'literal-value',
+    } as never, claudeAdapter, [], {})).toThrow(/envVar and baseUrl/);
+  });
 });
diff --git a/src/sandbox/microsandbox.ts b/src/sandbox/microsandbox.ts
@@ -5,6 +5,7 @@ import type {
   FsEntry,
 } from 'microsandbox';
 import type { SandboxConfig, SecretConfig, AgentSecretConfig } from '../types.js';
+import type { AgentAdapter } from '../agents/adapter.js';
 
 export interface CommandResult {
   stdout: string;
@@ -49,18 +50,54 @@ export function resolveEnv(
   return resolved;
 }
 
+// Claude-specific credential format. Subscription OAuth tokens are prefixed
+// `sk-ant-oat` followed by a version (e.g. `sk-ant-oat01-…`), issued by
+// `claude setup-token`. API keys use `sk-ant-api`. The framework picks the
+// env-var slot the placeholder lands under by inspecting the resolved
+// value's prefix — no separate config flag needed.
+const OAUTH_TOKEN_PREFIX = 'sk-ant-oat';
+const OAUTH_TOKEN_ENV_VAR = 'CLAUDE_CODE_OAUTH_TOKEN';
+
 /**
- * Build a microsandbox `Secret.env()` entry from an agent's secret config.
- * The `allowHosts` is derived from the base URL hostname.
+ * Wire an agent's secret into the sandbox `secrets` and `env`.
+ *
+ * Both auth modes (API key and Claude Code subscription OAuth) go through
+ * microsandbox `Secret.env()` TLS substitution — the cleartext value never
+ * enters the VM. Inside the sandbox the env var contains the
+ * `$MSB_<env-var-name>` placeholder; microsandbox swaps it for the real value
+ * on outbound TLS to the allowed host only.
+ *
+ * The resolved value's prefix picks which env var name carries the placeholder:
+ * - `sk-ant-oat…` (Claude Code subscription OAuth, issued by `claude setup-token`)
+ *   → `CLAUDE_CODE_OAUTH_TOKEN`
+ * - anything else (API keys for known agents, custom-agent secrets)
+ *   → `secret.envVar` (= `ANTHROPIC_API_KEY` for claude, etc.)
+ *
+ * Mutates `secrets` and `env` in place.
  */
-export function buildAgentSecret(secret: AgentSecretConfig, additionalAllowHosts?: string[]): SecretEntry {
+export function applyAgentAuth(
+  secret: AgentSecretConfig,
+  adapter: Pick<AgentAdapter, 'baseUrlEnvVar' | 'additionalAllowHosts'>,
+  secrets: SecretEntry[],
+  env: Record<string, string>,
+): void {
   if (!secret.envVar || !secret.baseUrl) {
     throw new Error('Agent secret must have envVar and baseUrl set (should be filled by config validation)');
   }
   const value = resolveValue(secret.value, secret.envVar);
+
+  const envVar = value.startsWith(OAUTH_TOKEN_PREFIX)
+    ? OAUTH_TOKEN_ENV_VAR
+    : secret.envVar;
+
   const hostname = new URL(secret.baseUrl).hostname;
-  const allowHosts = [hostname, ...(additionalAllowHosts ?? [])];
-  return Secret.env(secret.envVar, { value, allowHosts });
+  const allowHosts = [hostname, ...adapter.additionalAllowHosts];
+  secrets.push(Secret.env(envVar, { value, allowHosts }));
+
+  const baseUrlVar = secret.baseUrlEnvVar ?? adapter.baseUrlEnvVar;
+  if (baseUrlVar) {
+    env[baseUrlVar] = secret.baseUrl;
+  }
 }
 
 function resolveValue(value: string, envVar: string): string {

diff --git a/src/scoring/__tests__/judge.test.ts b/src/scoring/__tests__/judge.test.ts
@@ -23,7 +23,7 @@ vi.mock('../../sandbox/microsandbox.js', () => ({
     Object.assign(this, mockClient);
   }),
   buildSecrets: vi.fn().mockReturnValue([]),
-  buildAgentSecret: vi.fn().mockReturnValue({}),
+  applyAgentAuth: vi.fn(),
   resolveEnv: vi.fn().mockReturnValue({}),
 }));
 

diff --git a/src/scoring/judge.ts b/src/scoring/judge.ts
@@ -1,7 +1,7 @@
 import type { SolutionFile, JudgeScore, TestCase, SandboxAgentConfig, TargetConfig, Config, ProjectPaths, SourceConfig } from '../types.js';
 import { createAdapter } from '../agents/adapter.js';
 import { JUDGE_SCORING_CRITERIA, extractJson } from '../commands/prompt-helpers.js';
-import { MicrosandboxClient, buildSecrets, buildAgentSecret, resolveEnv } from '../sandbox/microsandbox.js';
+import { MicrosandboxClient, buildSecrets, applyAgentAuth, resolveEnv } from '../sandbox/microsandbox.js';
 import { createEgressLockdownLogger } from '../sandbox/egress-logger.js';
 import { scaffoldWorkspace, uploadSources } from '../sandbox/scaffolding.js';
 import { deduplicateSources } from '../core/source-resolver.js';
@@ -135,7 +135,7 @@ const INFRA_ALLOWLIST = [
 export function buildJudgeAllowlist(judgeConfig: SandboxAgentConfig, config: Config): string[] {
   const hosts = new Set<string>();
 
-  // 1. Agent API endpoint from secret's baseUrl
+  // 1. Agent API endpoint from secret.baseUrl
   if (judgeConfig.secret.baseUrl) {
     try { hosts.add(new URL(judgeConfig.secret.baseUrl).hostname); } catch { /* skip malformed */ }
   }
@@ -286,13 +286,8 @@ export async function runSandboxedJudge(
     const env = resolveEnv(config.sandbox?.env);
     const timeoutSecs = target.timeout ?? config.sandbox.defaultTimeout ?? 600;
 
-    // Merge agent secret into sandbox secrets and set base URL env var
     const judgeAdapter = createAdapter(judgeConfig);
-    secrets.push(buildAgentSecret(judgeConfig.secret, judgeAdapter.additionalAllowHosts));
-    const baseUrlVar = judgeConfig.secret.baseUrlEnvVar ?? judgeAdapter.baseUrlEnvVar;
-    if (baseUrlVar && judgeConfig.secret.baseUrl) {
-      env[baseUrlVar] = judgeConfig.secret.baseUrl;
-    }
+    applyAgentAuth(judgeConfig.secret, judgeAdapter, secrets, env);
 
     await client.create(
       sandboxName(testCase.id),

diff --git a/src/types.ts b/src/types.ts
@@ -109,9 +109,28 @@ export interface AgentConfig {
   logPattern?: string;
 }
 
-/** Agent config for sandboxed execution (executor/judge). Secret is required for microsandbox TLS injection. */
+/** Agent config for sandboxed execution (executor/judge).
+ *
+ * Both auth modes flow through microsandbox `Secret.env()` TLS substitution —
+ * the cleartext credential never enters the VM. Inside the sandbox the env
+ * var contains a `$MSB_<name>` placeholder; microsandbox swaps it for the
+ * real value on outbound TLS to the allowed host only.
+ *
+ * The resolved `secret.value`'s prefix picks which env var name carries the
+ * placeholder:
+ *
+ * - `sk-ant-oat…` (Claude Code subscription OAuth token, issued by
+ *   `claude setup-token`, requires Pro / Max / Team / Enterprise) →
+ *   `CLAUDE_CODE_OAUTH_TOKEN`. Avoids per-token API billing.
+ * - anything else (API keys for known agents, custom-agent secrets) →
+ *   `secret.envVar` (= `ANTHROPIC_API_KEY` for claude, etc.).
+ *
+ * Point `secret.value` at the host env var that holds the credential —
+ * `$ANTHROPIC_API_KEY` for the API-key path, `$CLAUDE_CODE_OAUTH_TOKEN` for
+ * the subscription path.
+ */
 export interface SandboxAgentConfig extends AgentConfig {
-  /** Agent's API secret and base URL. Flows to microsandbox TLS injection, sandbox env, and judge lockdown allowlist. */
+  /** Agent's secret and base URL. Auth mode is determined from the resolved value's prefix. */
   secret: AgentSecretConfig;
 }