ndycode · ndycode · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
@@ -208,6 +208,7 @@ function addCodexAliases(): void {
 	addReasoningAliases("gpt-5.1-codex", "gpt-5-codex");
 	addAlias("gpt_5_codex", "gpt-5-codex");
 
+	addReasoningAliases("codex-max", "gpt-5.1-codex-max");
 	addReasoningAliases("gpt-5.1-codex-max", "gpt-5.1-codex-max");
 
 	addAlias("codex-mini-latest", "gpt-5.1-codex-mini");

@@ -198,6 +198,18 @@ function resolveTextVerbosity(
 	);
 }
 
+function resolvePromptCacheRetention(
+	modelConfig: ConfigOptions,
+	body: RequestBody,
+): RequestBody["prompt_cache_retention"] {
+	const providerOpenAI = body.providerOptions?.openai;
+	return (
+		body.prompt_cache_retention ??
+		providerOpenAI?.promptCacheRetention ??
+		modelConfig.promptCacheRetention
+	);
+}
+
 function resolveInclude(modelConfig: ConfigOptions, body: RequestBody): string[] {
 	const providerOpenAI = body.providerOptions?.openai;
 	const base =
@@ -899,11 +911,17 @@ export async function transformRequestBody(
 
 	// Configure text verbosity (support user config)
 	// Default: "medium" (matches Codex CLI default for all GPT-5 models)
+	// Preserve any structured-output `text.format` contract from the host.
 	body.text = {
 		...body.text,
 		verbosity: resolveTextVerbosity(modelConfig, body),
 	};
 
+	const promptCacheRetention = resolvePromptCacheRetention(modelConfig, body);
+	if (promptCacheRetention !== undefined) {
+		body.prompt_cache_retention = promptCacheRetention;
+	}
+
 	if (shouldApplyFastSessionTuning) {
 		// In fast-session mode, prioritize speed by clamping to minimum reasoning + verbosity.
 		// getReasoningConfig normalizes unsupported values per model family.

@@ -487,7 +487,6 @@ function finalizeParsedResponse(state: ParsedResponseState): MutableRecord | nul
 
 	return response;
 }
-
 function extractResponseId(response: unknown): string | null {
 	if (!response || typeof response !== "object") return null;
 	const candidate = (response as { id?: unknown }).id;

@@ -24,9 +24,17 @@ export interface ConfigOptions {
 	reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "concise" | "detailed" | "off" | "on";
 	textVerbosity?: "low" | "medium" | "high";
+	promptCacheRetention?: PromptCacheRetention;
 	include?: string[];
 }
 
+export type PromptCacheRetention =
+	| "5m"
+	| "1h"
+	| "24h"
+	| "7d"
+	| (string & {});
+
 export interface ReasoningConfig {
 	effort: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	summary: "auto" | "concise" | "detailed";
@@ -131,7 +139,7 @@ export interface RequestBody {
 	/** Stable key to enable prompt-token caching on Codex backend */
 	prompt_cache_key?: string;
 	/** Retention mode for server-side prompt cache entries */
-	prompt_cache_retention?: string;
+	prompt_cache_retention?: PromptCacheRetention;
 	/** Resume a prior Responses API turn without resending the full transcript */
 	previous_response_id?: string;
 	max_output_tokens?: number;

@@ -1639,7 +1639,6 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		expect(thirdHeaders.get("x-test-account-id")).toBe("acc-1");
 		expect(thirdHeaders.get("x-test-access-token")).toBe("access-alpha");
 	});
-
 	it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => {
 		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
 		const longInput = Array.from({ length: 12 }, (_value, index) => ({
@@ -1782,7 +1781,6 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 				: {};
 		expect(finalUpstreamBody.input).toEqual(partiallyCompactedInput);
 	});
-
 	it("uses the refreshed token email when checking entitlement blocks", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const manager = buildRoutingManager([

@@ -116,6 +116,21 @@ describe("public api contract", () => {
 		const baseBody: RequestBody = {
 			model: "gpt-5-codex",
 			input: [{ type: "message", role: "user", content: "hi" }],
+			prompt_cache_retention: "24h",
+			text: {
+				format: {
+					type: "json_schema",
+					name: "compat_response",
+					schema: {
+						type: "object",
+						properties: {
+							answer: { type: "string" },
+						},
+						required: ["answer"],
+					},
+					strict: true,
+				},
+			},
 		};
 		const transformedPositional = await transformRequestBody(
 			JSON.parse(JSON.stringify(baseBody)) as RequestBody,
@@ -126,5 +141,9 @@ describe("public api contract", () => {
 			codexInstructions: "codex",
 		});
 		expect(transformedNamed).toEqual(transformedPositional);
+		expect(transformedPositional.prompt_cache_retention).toBe(baseBody.prompt_cache_retention);
+		expect(transformedNamed.prompt_cache_retention).toBe(baseBody.prompt_cache_retention);
+		expect(transformedPositional.text?.format).toEqual(baseBody.text?.format);
+		expect(transformedNamed.text?.format).toEqual(baseBody.text?.format);
 	});
 });
@@ -634,6 +634,53 @@ describe('Request Transformer Module', () => {
 				expect(result.prompt_cache_retention).toBe('24h');
 			});
 
+			it('uses prompt_cache_retention from providerOptions when body omits it', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const result = await transformRequestBody(body, codexInstructions);
+				expect(result.prompt_cache_retention).toBe('1h');
+			});
+
+			it('prefers providerOptions prompt_cache_retention over user config defaults', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const userConfig: UserConfig = {
+					global: { promptCacheRetention: '7d' },
+					models: {},
+				};
+				const result = await transformRequestBody(body, codexInstructions, userConfig);
+				expect(result.prompt_cache_retention).toBe('1h');
+			});
+
+			it('prefers body prompt_cache_retention over providerOptions', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					prompt_cache_retention: '24h',
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const result = await transformRequestBody(body, codexInstructions);
+				expect(result.prompt_cache_retention).toBe('24h');
+			});
+
 			it('preserves text.format when applying text verbosity defaults', async () => {
 				const body: RequestBody = {
 					model: 'gpt-5.4',
@@ -1254,6 +1301,56 @@ describe('Request Transformer Module', () => {
 			expect(result.text?.verbosity).toBe('low');
 		});
 
+		it('should inherit prompt_cache_retention from user config', async () => {
+			const body: RequestBody = {
+				model: 'gpt-5.4',
+				input: [],
+			};
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {},
+			};
+			const result = await transformRequestBody(body, codexInstructions, userConfig);
+			expect(result.prompt_cache_retention).toBe('7d');
+		});
+
+		it('should inherit prompt_cache_retention from model-specific user config', async () => {
+			const body: RequestBody = {
+				model: 'gpt-5.4',
+				input: [],
+			};
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {
+					'gpt-5.4': {
+						options: { promptCacheRetention: '24h' },
+					},
+				},
+			};
+			const result = await transformRequestBody(body, codexInstructions, userConfig);
+			expect(result.prompt_cache_retention).toBe('24h');
+		});
+
+		it('should inherit model-specific prompt_cache_retention in named params overload', async () => {
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {
+					'gpt-5.4': {
+						options: { promptCacheRetention: '24h' },
+					},
+				},
+			};
+			const result = await transformRequestBody({
+				body: {
+					model: 'gpt-5.4',
+					input: [],
+				},
+				codexInstructions,
+				userConfig,
+			});
+			expect(result.prompt_cache_retention).toBe('24h');
+		});
+
 		it('should prefer body text verbosity over providerOptions', async () => {
 			const body: RequestBody = {
 				model: 'gpt-5',

@@ -113,7 +113,6 @@ describe("response compaction", () => {
 			expect.any(Object),
 		);
 	});
-
 	it("falls back to local trimming when the compaction request fails", async () => {
 		const body: RequestBody = {
 			model: "gpt-5.4",

@@ -370,7 +370,6 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}}
 			expect(body.output?.[1]?.summary).toBeUndefined();
 			expect(body.reasoning_summary_text).toBeUndefined();
 		});
-
 		it('tracks commentary and final_answer phase text separately when phase labels are present', async () => {
 			const sseContent = [
 				'data: {"type":"response.created","response":{"id":"resp_phase_123","object":"response"}}',
@@ -536,7 +535,6 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}}
 			expect(body.output_text).toBeUndefined();
 			expect(body.phase_text).toBeUndefined();
 		});
-
 		it('should return original text if no final response found', async () => {
 			const sseContent = `data: {"type":"response.started"}
 data: {"type":"chunk","delta":"text"}