diff --git a/lib/request/helpers/model-map.ts b/lib/request/helpers/model-map.ts index 4f303ae0..81808fa3 100644 --- a/lib/request/helpers/model-map.ts +++ b/lib/request/helpers/model-map.ts @@ -208,6 +208,7 @@ function addCodexAliases(): void { addReasoningAliases("gpt-5.1-codex", "gpt-5-codex"); addAlias("gpt_5_codex", "gpt-5-codex"); + addReasoningAliases("codex-max", "gpt-5.1-codex-max"); addReasoningAliases("gpt-5.1-codex-max", "gpt-5.1-codex-max"); addAlias("codex-mini-latest", "gpt-5.1-codex-mini"); diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts index 3f6a3353..5a407f55 100644 --- a/lib/request/request-transformer.ts +++ b/lib/request/request-transformer.ts @@ -198,6 +198,18 @@ function resolveTextVerbosity( ); } +function resolvePromptCacheRetention( + modelConfig: ConfigOptions, + body: RequestBody, +): RequestBody["prompt_cache_retention"] { + const providerOpenAI = body.providerOptions?.openai; + return ( + body.prompt_cache_retention ?? + providerOpenAI?.promptCacheRetention ?? + modelConfig.promptCacheRetention + ); +} + function resolveInclude(modelConfig: ConfigOptions, body: RequestBody): string[] { const providerOpenAI = body.providerOptions?.openai; const base = @@ -899,11 +911,17 @@ export async function transformRequestBody( // Configure text verbosity (support user config) // Default: "medium" (matches Codex CLI default for all GPT-5 models) + // Preserve any structured-output `text.format` contract from the host. body.text = { ...body.text, verbosity: resolveTextVerbosity(modelConfig, body), }; + const promptCacheRetention = resolvePromptCacheRetention(modelConfig, body); + if (promptCacheRetention !== undefined) { + body.prompt_cache_retention = promptCacheRetention; + } + if (shouldApplyFastSessionTuning) { // In fast-session mode, prioritize speed by clamping to minimum reasoning + verbosity. // getReasoningConfig normalizes unsupported values per model family. diff --git a/lib/request/response-handler.ts b/lib/request/response-handler.ts index ee5a5bec..72b37db9 100644 --- a/lib/request/response-handler.ts +++ b/lib/request/response-handler.ts @@ -487,7 +487,6 @@ function finalizeParsedResponse(state: ParsedResponseState): MutableRecord | nul return response; } - function extractResponseId(response: unknown): string | null { if (!response || typeof response !== "object") return null; const candidate = (response as { id?: unknown }).id; diff --git a/lib/types.ts b/lib/types.ts index 1feeb8a9..17323401 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -24,9 +24,17 @@ export interface ConfigOptions { reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh"; reasoningSummary?: "auto" | "concise" | "detailed" | "off" | "on"; textVerbosity?: "low" | "medium" | "high"; + promptCacheRetention?: PromptCacheRetention; include?: string[]; } +export type PromptCacheRetention = + | "5m" + | "1h" + | "24h" + | "7d" + | (string & {}); + export interface ReasoningConfig { effort: "none" | "minimal" | "low" | "medium" | "high" | "xhigh"; summary: "auto" | "concise" | "detailed"; @@ -131,7 +139,7 @@ export interface RequestBody { /** Stable key to enable prompt-token caching on Codex backend */ prompt_cache_key?: string; /** Retention mode for server-side prompt cache entries */ - prompt_cache_retention?: string; + prompt_cache_retention?: PromptCacheRetention; /** Resume a prior Responses API turn without resending the full transcript */ previous_response_id?: string; max_output_tokens?: number; diff --git a/test/index.test.ts b/test/index.test.ts index 95b8a0cb..0cb0a42e 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -1639,7 +1639,6 @@ describe("OpenAIOAuthPlugin fetch handler", () => { expect(thirdHeaders.get("x-test-account-id")).toBe("acc-1"); expect(thirdHeaders.get("x-test-access-token")).toBe("access-alpha"); }); - it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => { const fetchHelpers = await import("../lib/request/fetch-helpers.js"); const longInput = Array.from({ length: 12 }, (_value, index) => ({ @@ -1782,7 +1781,6 @@ describe("OpenAIOAuthPlugin fetch handler", () => { : {}; expect(finalUpstreamBody.input).toEqual(partiallyCompactedInput); }); - it("uses the refreshed token email when checking entitlement blocks", async () => { const { AccountManager } = await import("../lib/accounts.js"); const manager = buildRoutingManager([ diff --git a/test/public-api-contract.test.ts b/test/public-api-contract.test.ts index 307093f3..a9d9a484 100644 --- a/test/public-api-contract.test.ts +++ b/test/public-api-contract.test.ts @@ -116,6 +116,21 @@ describe("public api contract", () => { const baseBody: RequestBody = { model: "gpt-5-codex", input: [{ type: "message", role: "user", content: "hi" }], + prompt_cache_retention: "24h", + text: { + format: { + type: "json_schema", + name: "compat_response", + schema: { + type: "object", + properties: { + answer: { type: "string" }, + }, + required: ["answer"], + }, + strict: true, + }, + }, }; const transformedPositional = await transformRequestBody( JSON.parse(JSON.stringify(baseBody)) as RequestBody, @@ -126,5 +141,9 @@ describe("public api contract", () => { codexInstructions: "codex", }); expect(transformedNamed).toEqual(transformedPositional); + expect(transformedPositional.prompt_cache_retention).toBe(baseBody.prompt_cache_retention); + expect(transformedNamed.prompt_cache_retention).toBe(baseBody.prompt_cache_retention); + expect(transformedPositional.text?.format).toEqual(baseBody.text?.format); + expect(transformedNamed.text?.format).toEqual(baseBody.text?.format); }); }); diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index 17efbbcf..af8d5c84 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -634,6 +634,53 @@ describe('Request Transformer Module', () => { expect(result.prompt_cache_retention).toBe('24h'); }); + it('uses prompt_cache_retention from providerOptions when body omits it', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions); + expect(result.prompt_cache_retention).toBe('1h'); + }); + + it('prefers providerOptions prompt_cache_retention over user config defaults', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: {}, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('1h'); + }); + + it('prefers body prompt_cache_retention over providerOptions', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + prompt_cache_retention: '24h', + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions); + expect(result.prompt_cache_retention).toBe('24h'); + }); + it('preserves text.format when applying text verbosity defaults', async () => { const body: RequestBody = { model: 'gpt-5.4', @@ -1254,6 +1301,56 @@ describe('Request Transformer Module', () => { expect(result.text?.verbosity).toBe('low'); }); + it('should inherit prompt_cache_retention from user config', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: {}, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('7d'); + }); + + it('should inherit prompt_cache_retention from model-specific user config', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: { + 'gpt-5.4': { + options: { promptCacheRetention: '24h' }, + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('24h'); + }); + + it('should inherit model-specific prompt_cache_retention in named params overload', async () => { + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: { + 'gpt-5.4': { + options: { promptCacheRetention: '24h' }, + }, + }, + }; + const result = await transformRequestBody({ + body: { + model: 'gpt-5.4', + input: [], + }, + codexInstructions, + userConfig, + }); + expect(result.prompt_cache_retention).toBe('24h'); + }); + it('should prefer body text verbosity over providerOptions', async () => { const body: RequestBody = { model: 'gpt-5', diff --git a/test/response-compaction.test.ts b/test/response-compaction.test.ts index 38cf7261..24c840a3 100644 --- a/test/response-compaction.test.ts +++ b/test/response-compaction.test.ts @@ -113,7 +113,6 @@ describe("response compaction", () => { expect.any(Object), ); }); - it("falls back to local trimming when the compaction request fails", async () => { const body: RequestBody = { model: "gpt-5.4", diff --git a/test/response-handler.test.ts b/test/response-handler.test.ts index f372f7c3..91763adb 100644 --- a/test/response-handler.test.ts +++ b/test/response-handler.test.ts @@ -370,7 +370,6 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}} expect(body.output?.[1]?.summary).toBeUndefined(); expect(body.reasoning_summary_text).toBeUndefined(); }); - it('tracks commentary and final_answer phase text separately when phase labels are present', async () => { const sseContent = [ 'data: {"type":"response.created","response":{"id":"resp_phase_123","object":"response"}}', @@ -536,7 +535,6 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}} expect(body.output_text).toBeUndefined(); expect(body.phase_text).toBeUndefined(); }); - it('should return original text if no final response found', async () => { const sseContent = `data: {"type":"response.started"} data: {"type":"chunk","delta":"text"}