From 9968ba8c5948383a313d9f43b8bcdf0c552d8564 Mon Sep 17 00:00:00 2001 From: ndycode Date: Sun, 22 Mar 2026 16:02:34 +0800 Subject: [PATCH 1/5] enhance responses parser for semantic SSE events --- lib/request/response-handler.ts | 426 +++++++++++++++++++++++++++++++- test/fetch-helpers.test.ts | 19 ++ test/response-handler.test.ts | 82 ++++++ 3 files changed, 516 insertions(+), 11 deletions(-) diff --git a/lib/request/response-handler.ts b/lib/request/response-handler.ts index b8db279b..a36f89ae 100644 --- a/lib/request/response-handler.ts +++ b/lib/request/response-handler.ts @@ -1,5 +1,6 @@ import { createLogger, logRequest, LOGGING_ENABLED } from "../logger.js"; import { PLUGIN_NAME } from "../constants.js"; +import { isRecord } from "../utils.js"; import type { SSEEventData } from "../types.js"; @@ -8,6 +9,322 @@ const log = createLogger("response-handler"); const MAX_SSE_SIZE = 10 * 1024 * 1024; // 10MB limit to prevent memory exhaustion const DEFAULT_STREAM_STALL_TIMEOUT_MS = 45_000; +type MutableRecord = Record; + +interface ParsedResponseState { + finalResponse: MutableRecord | null; + lastPhase: string | null; + outputItems: Map; + outputText: Map; + phaseText: Map; + reasoningSummaryText: Map; +} + +function createParsedResponseState(): ParsedResponseState { + return { + finalResponse: null, + lastPhase: null, + outputItems: new Map(), + outputText: new Map(), + phaseText: new Map(), + reasoningSummaryText: new Map(), + }; +} + +function toMutableRecord(value: unknown): MutableRecord | null { + return isRecord(value) ? { ...value } : null; +} + +function getNumberField(record: MutableRecord, key: string): number | null { + const value = record[key]; + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function getStringField(record: MutableRecord, key: string): string | null { + const value = record[key]; + return typeof value === "string" && value.trim().length > 0 ? value : null; +} + +function cloneContentArray(content: unknown): MutableRecord[] { + if (!Array.isArray(content)) return []; + return content.filter(isRecord).map((part) => ({ ...part })); +} + +function mergeRecord(base: MutableRecord | null, update: MutableRecord): MutableRecord { + if (!base) return { ...update }; + const merged: MutableRecord = { ...base, ...update }; + if ("content" in update || "content" in base) { + merged.content = cloneContentArray(update.content ?? base.content); + } + return merged; +} + +function makeOutputTextKey(outputIndex: number | null, contentIndex: number | null): string | null { + if (outputIndex === null || contentIndex === null) return null; + return `${outputIndex}:${contentIndex}`; +} + +function makeSummaryKey(outputIndex: number | null, summaryIndex: number | null): string | null { + if (outputIndex === null || summaryIndex === null) return null; + return `${outputIndex}:${summaryIndex}`; +} + +function getPartText(part: unknown): string | null { + if (!isRecord(part)) return null; + const text = getStringField(part, "text"); + if (text) return text; + return null; +} + +function capturePhase( + state: ParsedResponseState, + phase: unknown, + text: string | null = null, +): void { + if (typeof phase !== "string" || phase.trim().length === 0) return; + const normalizedPhase = phase.trim(); + state.lastPhase = normalizedPhase; + if (text && text.length > 0) { + const existing = state.phaseText.get(normalizedPhase) ?? ""; + state.phaseText.set(normalizedPhase, `${existing}${text}`); + } +} + +function upsertOutputItem(state: ParsedResponseState, outputIndex: number | null, item: unknown): void { + if (outputIndex === null || !isRecord(item)) return; + const current = state.outputItems.get(outputIndex) ?? null; + const merged = mergeRecord(current, item); + state.outputItems.set(outputIndex, merged); + capturePhase(state, merged.phase); +} + +function setOutputTextValue( + state: ParsedResponseState, + outputIndex: number | null, + contentIndex: number | null, + text: string | null, + phase: unknown = undefined, +): void { + if (!text) return; + const key = makeOutputTextKey(outputIndex, contentIndex); + if (!key) return; + const existing = state.outputText.get(key) ?? ""; + state.outputText.set(key, text); + const phaseDelta = existing.length > 0 && text.startsWith(existing) + ? text.slice(existing.length) + : existing === text + ? "" + : text; + capturePhase(state, phase, phaseDelta); +} + +function appendOutputTextValue( + state: ParsedResponseState, + outputIndex: number | null, + contentIndex: number | null, + delta: string | null, + phase: unknown = undefined, +): void { + if (!delta) return; + const key = makeOutputTextKey(outputIndex, contentIndex); + if (!key) return; + const existing = state.outputText.get(key) ?? ""; + state.outputText.set(key, `${existing}${delta}`); + capturePhase(state, phase, delta); +} + +function setReasoningSummaryValue( + state: ParsedResponseState, + outputIndex: number | null, + summaryIndex: number | null, + text: string | null, +): void { + if (!text) return; + const key = makeSummaryKey(outputIndex, summaryIndex); + if (!key) return; + state.reasoningSummaryText.set(key, text); +} + +function appendReasoningSummaryValue( + state: ParsedResponseState, + outputIndex: number | null, + summaryIndex: number | null, + delta: string | null, +): void { + if (!delta) return; + const key = makeSummaryKey(outputIndex, summaryIndex); + if (!key) return; + const existing = state.reasoningSummaryText.get(key) ?? ""; + state.reasoningSummaryText.set(key, `${existing}${delta}`); +} + +function ensureOutputItemAtIndex(output: unknown[], index: number): MutableRecord | null { + while (output.length <= index) { + output.push({}); + } + const current = output[index]; + if (!isRecord(current)) { + output[index] = {}; + } + return isRecord(output[index]) ? (output[index] as MutableRecord) : null; +} + +function ensureContentPartAtIndex(item: MutableRecord, index: number): MutableRecord | null { + const content = Array.isArray(item.content) ? [...item.content] : []; + while (content.length <= index) { + content.push({}); + } + const current = content[index]; + if (!isRecord(current)) { + content[index] = {}; + } + item.content = content; + return isRecord(content[index]) ? (content[index] as MutableRecord) : null; +} + +function applyAccumulatedOutputText(response: MutableRecord, state: ParsedResponseState): void { + if (state.outputText.size === 0) return; + const output = Array.isArray(response.output) ? [...response.output] : []; + + for (const [key, text] of state.outputText.entries()) { + const [outputIndexText, contentIndexText] = key.split(":"); + const outputIndex = Number.parseInt(outputIndexText ?? "", 10); + const contentIndex = Number.parseInt(contentIndexText ?? "", 10); + if (!Number.isFinite(outputIndex) || !Number.isFinite(contentIndex)) continue; + const item = ensureOutputItemAtIndex(output, outputIndex); + if (!item) continue; + const part = ensureContentPartAtIndex(item, contentIndex); + if (!part) continue; + if (!getStringField(part, "type")) { + part.type = "output_text"; + } + part.text = text; + } + + if (output.length > 0) { + response.output = output; + } +} + +function mergeOutputItemsIntoResponse(response: MutableRecord, state: ParsedResponseState): void { + if (state.outputItems.size === 0) return; + const output = Array.isArray(response.output) ? [...response.output] : []; + + for (const [outputIndex, item] of state.outputItems.entries()) { + while (output.length <= outputIndex) { + output.push({}); + } + output[outputIndex] = mergeRecord(toMutableRecord(output[outputIndex]), item); + } + + response.output = output; +} + +function collectMessageOutputText(output: unknown[]): string { + return output + .filter(isRecord) + .map((item) => { + if (item.type !== "message") return ""; + const content = Array.isArray(item.content) ? item.content : []; + return content + .filter(isRecord) + .map((part) => { + if (part.type !== "output_text") return ""; + return typeof part.text === "string" ? part.text : ""; + }) + .join(""); + }) + .filter((text) => text.length > 0) + .join(""); +} + +function collectReasoningSummaryText(output: unknown[]): string { + return output + .filter(isRecord) + .map((item) => { + if (item.type !== "reasoning") return ""; + const summary = Array.isArray(item.summary) ? item.summary : []; + return summary + .filter(isRecord) + .map((part) => (typeof part.text === "string" ? part.text : "")) + .filter((text) => text.length > 0) + .join("\n\n"); + }) + .filter((text) => text.length > 0) + .join("\n\n"); +} + +function applyReasoningSummaries(response: MutableRecord, state: ParsedResponseState): void { + if (state.reasoningSummaryText.size === 0) return; + const output = Array.isArray(response.output) ? [...response.output] : []; + + for (const [key, text] of state.reasoningSummaryText.entries()) { + const [outputIndexText, summaryIndexText] = key.split(":"); + const outputIndex = Number.parseInt(outputIndexText ?? "", 10); + const summaryIndex = Number.parseInt(summaryIndexText ?? "", 10); + if (!Number.isFinite(outputIndex) || !Number.isFinite(summaryIndex)) continue; + const item = ensureOutputItemAtIndex(output, outputIndex); + if (!item) continue; + const summary = Array.isArray(item.summary) ? [...item.summary] : []; + while (summary.length <= summaryIndex) { + summary.push({}); + } + const current = summary[summaryIndex]; + const nextPart = isRecord(current) ? { ...current } : {}; + if (!getStringField(nextPart, "type")) { + nextPart.type = "summary_text"; + } + nextPart.text = text; + summary[summaryIndex] = nextPart; + item.summary = summary; + if (!getStringField(item, "type")) { + item.type = "reasoning"; + } + } + + if (output.length > 0) { + response.output = output; + } +} + +function finalizeParsedResponse(state: ParsedResponseState): MutableRecord | null { + const response = state.finalResponse ? { ...state.finalResponse } : null; + if (!response) return null; + + mergeOutputItemsIntoResponse(response, state); + applyAccumulatedOutputText(response, state); + applyReasoningSummaries(response, state); + + const output = Array.isArray(response.output) ? response.output : []; + if (typeof response.output_text !== "string") { + const outputText = collectMessageOutputText(output); + if (outputText.length > 0) { + response.output_text = outputText; + } + } + + const reasoningSummaryText = collectReasoningSummaryText(output); + if (reasoningSummaryText.length > 0) { + response.reasoning_summary_text = reasoningSummaryText; + } + + if (state.lastPhase && typeof response.phase !== "string") { + response.phase = state.lastPhase; + } + + if (state.phaseText.size > 0) { + const phaseText: MutableRecord = {}; + for (const [phase, text] of state.phaseText.entries()) { + phaseText[phase] = text; + if (phase === "commentary") response.commentary_text = text; + if (phase === "final_answer") response.final_answer_text = text; + } + response.phase_text = phaseText; + } + + return response; +} + function extractResponseId(response: unknown): string | null { if (!response || typeof response !== "object") return null; const candidate = (response as { id?: unknown }).id; @@ -33,20 +350,105 @@ function notifyResponseId( } function maybeCaptureResponseEvent( + state: ParsedResponseState, data: SSEEventData, onResponseId?: (responseId: string) => void, -): unknown | null { +): void { if (data.type === "error") { log.error("SSE error event received", { error: data }); - return null; + return; } - if (data.type === "response.done" || data.type === "response.completed") { + if (isRecord(data.response)) { + state.finalResponse = { ...data.response }; notifyResponseId(onResponseId, data.response); - return data.response ?? null; } - return null; + if (data.type === "response.done" || data.type === "response.completed") { + return; + } + + const eventRecord = toMutableRecord(data); + if (!eventRecord) return; + const outputIndex = getNumberField(eventRecord, "output_index"); + + if (data.type === "response.output_item.added" || data.type === "response.output_item.done") { + upsertOutputItem(state, outputIndex, eventRecord.item); + return; + } + + if (data.type === "response.output_text.delta") { + appendOutputTextValue( + state, + outputIndex, + getNumberField(eventRecord, "content_index"), + getStringField(eventRecord, "delta"), + eventRecord.phase, + ); + return; + } + + if (data.type === "response.output_text.done") { + setOutputTextValue( + state, + outputIndex, + getNumberField(eventRecord, "content_index"), + getStringField(eventRecord, "text"), + eventRecord.phase, + ); + return; + } + + if (data.type === "response.content_part.added" || data.type === "response.content_part.done") { + const part = toMutableRecord(eventRecord.part); + if (!part || getStringField(part, "type") !== "output_text") { + capturePhase(state, part?.phase); + return; + } + setOutputTextValue( + state, + outputIndex, + getNumberField(eventRecord, "content_index"), + getPartText(part), + part.phase, + ); + return; + } + + if (data.type === "response.reasoning_summary_text.delta") { + appendReasoningSummaryValue( + state, + outputIndex, + getNumberField(eventRecord, "summary_index"), + getStringField(eventRecord, "delta"), + ); + return; + } + + if (data.type === "response.reasoning_summary_text.done") { + setReasoningSummaryValue( + state, + outputIndex, + getNumberField(eventRecord, "summary_index"), + getStringField(eventRecord, "text"), + ); + return; + } + + if ( + data.type === "response.reasoning_summary_part.added" || + data.type === "response.reasoning_summary_part.done" + ) { + setReasoningSummaryValue( + state, + outputIndex, + getNumberField(eventRecord, "summary_index"), + getPartText(eventRecord.part), + ); + return; + } + + capturePhase(state, eventRecord.phase); } /** @@ -60,6 +462,7 @@ function parseSseStream( onResponseId?: (responseId: string) => void, ): unknown | null { const lines = sseText.split(/\r?\n/); + const state = createParsedResponseState(); for (const line of lines) { const trimmedLine = line.trim(); @@ -68,15 +471,14 @@ function parseSseStream( if (!payload || payload === '[DONE]') continue; try { const data = JSON.parse(payload) as SSEEventData; - const finalResponse = maybeCaptureResponseEvent(data, onResponseId); - if (finalResponse) return finalResponse; + maybeCaptureResponseEvent(state, data, onResponseId); } catch { // Skip malformed JSON } } } - return null; + return finalizeParsedResponse(state); } /** @@ -125,7 +527,9 @@ export async function convertSseToJson( if (!finalResponse) { log.warn("Could not find final response in SSE stream"); - logRequest("stream-error", { error: "No response.done event found" }); + logRequest("stream-error", { + error: "No terminal response event found in SSE stream", + }); // Return original stream if we can't parse return new Response(fullText, { @@ -181,7 +585,7 @@ function createResponseIdCapturingStream( if (!payload || payload === "[DONE]") continue; try { const data = JSON.parse(payload) as SSEEventData; - maybeCaptureResponseEvent(data, onResponseId); + maybeCaptureResponseEvent(createParsedResponseState(), data, onResponseId); } catch { // Ignore malformed SSE lines and keep forwarding the raw stream. } @@ -230,7 +634,7 @@ async function readWithTimeout( timeoutId = setTimeout(() => { reject( new Error( - `SSE stream stalled for ${timeoutMs}ms while waiting for response.done`, + `SSE stream stalled for ${timeoutMs}ms while waiting for a terminal response event`, ), ); }, timeoutMs); diff --git a/test/fetch-helpers.test.ts b/test/fetch-helpers.test.ts index 518a725c..f90108e7 100644 --- a/test/fetch-helpers.test.ts +++ b/test/fetch-helpers.test.ts @@ -743,6 +743,25 @@ describe('createEntitlementErrorResponse', () => { const text = await result.text(); expect(text).toBe('stream body'); }); + + it('captures response ids from streaming semantic SSE without rewriting the stream', async () => { + const onResponseId = vi.fn(); + const response = new Response( + [ + 'data: {"type":"response.created","response":{"id":"resp_stream_123"}}', + '', + 'data: {"type":"response.done","response":{"id":"resp_stream_123"}}', + '', + ].join('\n'), + { status: 200, headers: new Headers({ 'content-type': 'text/event-stream' }) }, + ); + + const result = await handleSuccessResponse(response, true, { onResponseId }); + const text = await result.text(); + + expect(text).toContain('"resp_stream_123"'); + expect(onResponseId).toHaveBeenCalledWith('resp_stream_123'); + }); }); describe('handleErrorResponse error normalization', () => { diff --git a/test/response-handler.test.ts b/test/response-handler.test.ts index 880554c6..2fbc7538 100644 --- a/test/response-handler.test.ts +++ b/test/response-handler.test.ts @@ -66,6 +66,88 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}} expect(body).toEqual({ id: 'resp_456', output: 'done' }); }); + it('synthesizes output_text and reasoning summaries from semantic SSE events', async () => { + const sseContent = [ + 'data: {"type":"response.created","response":{"id":"resp_semantic_123","object":"response"}}', + 'data: {"type":"response.output_item.added","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"final_answer"}}', + 'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"Hello ","phase":"final_answer"}', + 'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"world","phase":"final_answer"}', + 'data: {"type":"response.output_text.done","output_index":0,"content_index":0,"text":"Hello world","phase":"final_answer"}', + 'data: {"type":"response.output_item.added","output_index":1,"item":{"id":"rs_123","type":"reasoning"}}', + 'data: {"type":"response.reasoning_summary_text.delta","output_index":1,"summary_index":0,"delta":"Need more context."}', + 'data: {"type":"response.reasoning_summary_text.done","output_index":1,"summary_index":0,"text":"Need more context."}', + 'data: {"type":"response.completed","response":{"id":"resp_semantic_123","object":"response"}}', + '', + ].join('\n'); + const response = new Response(sseContent); + const headers = new Headers(); + + const result = await convertSseToJson(response, headers); + const body = await result.json() as { + id: string; + output?: Array<{ + type?: string; + role?: string; + phase?: string; + content?: Array<{ type?: string; text?: string }>; + summary?: Array<{ type?: string; text?: string }>; + }>; + output_text?: string; + reasoning_summary_text?: string; + phase?: string; + final_answer_text?: string; + phase_text?: Record; + }; + + expect(body.id).toBe('resp_semantic_123'); + expect(body.output_text).toBe('Hello world'); + expect(body.reasoning_summary_text).toBe('Need more context.'); + expect(body.phase).toBe('final_answer'); + expect(body.final_answer_text).toBe('Hello world'); + expect(body.phase_text).toEqual({ final_answer: 'Hello world' }); + expect(body.output?.[0]?.content?.[0]).toEqual({ + type: 'output_text', + text: 'Hello world', + }); + expect(body.output?.[1]?.summary?.[0]).toEqual({ + type: 'summary_text', + text: 'Need more context.', + }); + }); + + it('tracks commentary and final_answer phase text separately when phase labels are present', async () => { + const sseContent = [ + 'data: {"type":"response.created","response":{"id":"resp_phase_123","object":"response"}}', + 'data: {"type":"response.output_item.added","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"commentary"}}', + 'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"Thinking...","phase":"commentary"}', + 'data: {"type":"response.output_text.done","output_index":0,"content_index":0,"text":"Thinking...","phase":"commentary"}', + 'data: {"type":"response.output_item.done","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"final_answer"}}', + 'data: {"type":"response.output_text.done","output_index":0,"content_index":1,"text":"Done.","phase":"final_answer"}', + 'data: {"type":"response.done","response":{"id":"resp_phase_123","object":"response"}}', + '', + ].join('\n'); + const response = new Response(sseContent); + const headers = new Headers(); + + const result = await convertSseToJson(response, headers); + const body = await result.json() as { + phase?: string; + commentary_text?: string; + final_answer_text?: string; + phase_text?: Record; + output_text?: string; + }; + + expect(body.phase).toBe('final_answer'); + expect(body.commentary_text).toBe('Thinking...'); + expect(body.final_answer_text).toBe('Done.'); + expect(body.phase_text).toEqual({ + commentary: 'Thinking...', + final_answer: 'Done.', + }); + expect(body.output_text).toBe('Thinking...Done.'); + }); + it('should return original text if no final response found', async () => { const sseContent = `data: {"type":"response.started"} data: {"type":"chunk","delta":"text"} From 71d44c73974f2e91d6647d5ef86339d998deb6f8 Mon Sep 17 00:00:00 2001 From: ndycode Date: Sun, 22 Mar 2026 16:28:19 +0800 Subject: [PATCH 2/5] add response compaction fallback for fast sessions --- index.ts | 34 +++++++ lib/request/fetch-helpers.ts | 27 ++++- lib/request/helpers/model-map.ts | 20 +++- lib/request/request-transformer.ts | 64 ++++++++++-- lib/request/response-compaction.ts | 158 +++++++++++++++++++++++++++++ test/codex-manager-cli.test.ts | 10 +- test/index.test.ts | 63 +++++++++++- test/model-map.test.ts | 8 ++ test/request-transformer.test.ts | 28 ++++- test/response-compaction.test.ts | 115 +++++++++++++++++++++ 10 files changed, 502 insertions(+), 25 deletions(-) create mode 100644 lib/request/response-compaction.ts create mode 100644 test/response-compaction.test.ts diff --git a/index.ts b/index.ts index 98d56bdd..ec31a07b 100644 --- a/index.ts +++ b/index.ts @@ -154,6 +154,7 @@ import { isWorkspaceDisabledError, } from "./lib/request/fetch-helpers.js"; import { applyFastSessionDefaults } from "./lib/request/request-transformer.js"; +import { applyResponseCompaction } from "./lib/request/response-compaction.js"; import { getRateLimitBackoff, RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS, @@ -1369,10 +1370,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { fastSession: fastSessionEnabled, fastSessionStrategy, fastSessionMaxInputItems, + deferFastSessionInputTrimming: fastSessionEnabled, }, ); let requestInit = transformation?.updatedInit ?? baseInit; let transformedBody: RequestBody | undefined = transformation?.body; + const deferredFastSessionInputTrim = + transformation?.deferredFastSessionInputTrim; const promptCacheKey = transformedBody?.prompt_cache_key; let model = transformedBody?.model; let modelFamily = model ? getModelFamily(model) : "gpt-5.1"; @@ -1670,6 +1674,36 @@ accountAttemptLoop: while (attempted.size < Math.max(1, accountCount)) { promptCacheKey: effectivePromptCacheKey, }, ); + if (transformedBody && deferredFastSessionInputTrim) { + const compactionResult = await applyResponseCompaction({ + body: transformedBody, + requestUrl: url, + headers, + trim: deferredFastSessionInputTrim, + fetchImpl: async (requestUrl, requestInit) => { + const normalizedCompactionUrl = + typeof requestUrl === "string" + ? requestUrl + : String(requestUrl); + return fetch( + normalizedCompactionUrl, + applyProxyCompatibleInit( + normalizedCompactionUrl, + requestInit, + ), + ); + }, + signal: abortSignal, + timeoutMs: Math.min(fetchTimeoutMs, 4_000), + }); + if (compactionResult.mode !== "unchanged") { + transformedBody = compactionResult.body; + requestInit = { + ...(requestInit ?? {}), + body: JSON.stringify(transformedBody), + }; + } + } const quotaScheduleKey = `${entitlementAccountKey}:${model ?? modelFamily}`; const capabilityModelKey = model ?? modelFamily; const quotaDeferral = preemptiveQuotaScheduler.getDeferral(quotaScheduleKey); diff --git a/lib/request/fetch-helpers.ts b/lib/request/fetch-helpers.ts index 37043418..348ea4f8 100644 --- a/lib/request/fetch-helpers.ts +++ b/lib/request/fetch-helpers.ts @@ -8,7 +8,12 @@ import { ProxyAgent } from "undici"; import { queuedRefresh } from "../refresh-queue.js"; import { logRequest, logError, logWarn } from "../logger.js"; import { getCodexInstructions, getModelFamily } from "../prompts/codex.js"; -import { transformRequestBody, normalizeModel } from "./request-transformer.js"; +import { + transformRequestBody, + normalizeModel, + resolveFastSessionInputTrimPlan, + type FastSessionInputTrimPlan, +} from "./request-transformer.js"; import { attachResponseIdCapture, convertSseToJson, @@ -99,6 +104,12 @@ export interface ResolveUnsupportedCodexFallbackOptions { customChain?: Record; } +export interface TransformRequestForCodexResult { + body: RequestBody; + updatedInit: RequestInit; + deferredFastSessionInputTrim?: FastSessionInputTrimPlan["trim"]; +} + function canonicalizeModelName(model: string | undefined): string | undefined { if (!model) return undefined; const trimmed = model.trim().toLowerCase(); @@ -651,8 +662,9 @@ export async function transformRequestForCodex( fastSession?: boolean; fastSessionStrategy?: "hybrid" | "always"; fastSessionMaxInputItems?: number; + deferFastSessionInputTrimming?: boolean; }, -): Promise<{ body: RequestBody; updatedInit: RequestInit } | undefined> { +): Promise { const hasParsedBody = parsedBody !== undefined && parsedBody !== null && @@ -670,6 +682,12 @@ export async function transformRequestForCodex( body = JSON.parse(init.body) as RequestBody; } const originalModel = body.model; + const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan( + body, + options?.fastSession ?? false, + options?.fastSessionStrategy ?? "hybrid", + options?.fastSessionMaxInputItems ?? 30, + ); // Normalize model first to determine which instructions to fetch // This ensures we get the correct model-specific prompt @@ -700,6 +718,7 @@ export async function transformRequestForCodex( options?.fastSession ?? false, options?.fastSessionStrategy ?? "hybrid", options?.fastSessionMaxInputItems ?? 30, + options?.deferFastSessionInputTrimming ?? false, ); // Log transformed request @@ -720,6 +739,10 @@ export async function transformRequestForCodex( return { body: transformedBody, updatedInit: { ...(init ?? {}), body: JSON.stringify(transformedBody) }, + deferredFastSessionInputTrim: + options?.deferFastSessionInputTrimming === true + ? fastSessionInputTrimPlan.trim + : undefined, }; } catch (e) { logError(`${ERROR_MESSAGES.REQUEST_PARSE_ERROR}`, e); diff --git a/lib/request/helpers/model-map.ts b/lib/request/helpers/model-map.ts index 20a6832d..b623c845 100644 --- a/lib/request/helpers/model-map.ts +++ b/lib/request/helpers/model-map.ts @@ -25,6 +25,7 @@ export type PromptModelFamily = export interface ModelCapabilities { toolSearch: boolean; computerUse: boolean; + compaction: boolean; } export interface ModelProfile { @@ -48,14 +49,27 @@ const TOOL_CAPABILITIES = { full: { toolSearch: true, computerUse: true, + compaction: true, }, computerOnly: { toolSearch: false, computerUse: true, + compaction: false, + }, + computerAndCompact: { + toolSearch: false, + computerUse: true, + compaction: true, + }, + compactOnly: { + toolSearch: false, + computerUse: false, + compaction: true, }, basic: { toolSearch: false, computerUse: false, + compaction: false, }, } as const satisfies Record; @@ -103,7 +117,7 @@ export const MODEL_PROFILES: Record = { promptFamily: "gpt-5.2", defaultReasoningEffort: "high", supportedReasoningEfforts: ["medium", "high", "xhigh"], - capabilities: TOOL_CAPABILITIES.computerOnly, + capabilities: TOOL_CAPABILITIES.computerAndCompact, }, "gpt-5.2-pro": { normalizedModel: "gpt-5.2-pro", @@ -145,14 +159,14 @@ export const MODEL_PROFILES: Record = { promptFamily: "gpt-5.2", defaultReasoningEffort: "medium", supportedReasoningEfforts: ["medium"], - capabilities: TOOL_CAPABILITIES.basic, + capabilities: TOOL_CAPABILITIES.full, }, "gpt-5-nano": { normalizedModel: "gpt-5-nano", promptFamily: "gpt-5.2", defaultReasoningEffort: "medium", supportedReasoningEfforts: ["medium"], - capabilities: TOOL_CAPABILITIES.basic, + capabilities: TOOL_CAPABILITIES.compactOnly, }, } as const; diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts index 6c002476..3f6a3353 100644 --- a/lib/request/request-transformer.ts +++ b/lib/request/request-transformer.ts @@ -33,6 +33,7 @@ export interface TransformRequestBodyParams { fastSession?: boolean; fastSessionStrategy?: FastSessionStrategy; fastSessionMaxInputItems?: number; + deferFastSessionInputTrimming?: boolean; } const PLAN_MODE_ONLY_TOOLS = new Set(["request_user_input"]); @@ -482,6 +483,15 @@ export function trimInputForFastSession( return trimmed.slice(trimmed.length - safeMax); } +export interface FastSessionInputTrimPlan { + shouldApply: boolean; + isTrivialTurn: boolean; + trim?: { + maxItems: number; + preferLatestUserOnly: boolean; + }; +} + function isTrivialLatestPrompt(text: string): boolean { const normalized = text.trim(); if (!normalized) return false; @@ -540,6 +550,33 @@ function isComplexFastSessionRequest( return false; } +export function resolveFastSessionInputTrimPlan( + body: RequestBody, + fastSession: boolean, + fastSessionStrategy: FastSessionStrategy, + fastSessionMaxInputItems: number, +): FastSessionInputTrimPlan { + const shouldApplyFastSessionTuning = + fastSession && + (fastSessionStrategy === "always" || + !isComplexFastSessionRequest(body, fastSessionMaxInputItems)); + const latestUserText = getLatestUserText(body.input); + const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? ""); + const shouldPreferLatestUserOnly = + shouldApplyFastSessionTuning && isTrivialTurn; + + return { + shouldApply: shouldApplyFastSessionTuning, + isTrivialTurn, + trim: shouldApplyFastSessionTuning + ? { + maxItems: fastSessionMaxInputItems, + preferLatestUserOnly: shouldPreferLatestUserOnly, + } + : undefined, + }; +} + function getLatestUserText(input: InputItem[] | undefined): string | undefined { if (!Array.isArray(input)) return undefined; for (let i = input.length - 1; i >= 0; i--) { @@ -672,6 +709,7 @@ export async function transformRequestBody( fastSession?: boolean, fastSessionStrategy?: FastSessionStrategy, fastSessionMaxInputItems?: number, + deferFastSessionInputTrimming?: boolean, ): Promise; export async function transformRequestBody( bodyOrParams: RequestBody | TransformRequestBodyParams, @@ -681,6 +719,7 @@ export async function transformRequestBody( fastSession = false, fastSessionStrategy: FastSessionStrategy = "hybrid", fastSessionMaxInputItems = 30, + deferFastSessionInputTrimming = false, ): Promise { const useNamedParams = typeof codexInstructions === "undefined" && @@ -695,6 +734,7 @@ export async function transformRequestBody( let resolvedFastSession: boolean; let resolvedFastSessionStrategy: FastSessionStrategy; let resolvedFastSessionMaxInputItems: number; + let resolvedDeferFastSessionInputTrimming: boolean; if (useNamedParams) { const namedParams = bodyOrParams as TransformRequestBodyParams; @@ -705,6 +745,8 @@ export async function transformRequestBody( resolvedFastSession = namedParams.fastSession ?? false; resolvedFastSessionStrategy = namedParams.fastSessionStrategy ?? "hybrid"; resolvedFastSessionMaxInputItems = namedParams.fastSessionMaxInputItems ?? 30; + resolvedDeferFastSessionInputTrimming = + namedParams.deferFastSessionInputTrimming ?? false; } else { body = bodyOrParams as RequestBody; resolvedCodexInstructions = codexInstructions; @@ -713,6 +755,7 @@ export async function transformRequestBody( resolvedFastSession = fastSession; resolvedFastSessionStrategy = fastSessionStrategy; resolvedFastSessionMaxInputItems = fastSessionMaxInputItems; + resolvedDeferFastSessionInputTrimming = deferFastSessionInputTrimming; } if (!body || typeof body !== "object") { @@ -747,17 +790,17 @@ export async function transformRequestBody( const reasoningModel = shouldUseNormalizedReasoningModel ? normalizedModel : lookupModel; - const shouldApplyFastSessionTuning = - resolvedFastSession && - (resolvedFastSessionStrategy === "always" || - !isComplexFastSessionRequest(body, resolvedFastSessionMaxInputItems)); - const latestUserText = getLatestUserText(body.input); - const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? ""); + const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan( + body, + resolvedFastSession, + resolvedFastSessionStrategy, + resolvedFastSessionMaxInputItems, + ); + const shouldApplyFastSessionTuning = fastSessionInputTrimPlan.shouldApply; + const isTrivialTurn = fastSessionInputTrimPlan.isTrivialTurn; const shouldDisableToolsForTrivialTurn = shouldApplyFastSessionTuning && isTrivialTurn; - const shouldPreferLatestUserOnly = - shouldApplyFastSessionTuning && isTrivialTurn; // Codex required fields // ChatGPT backend REQUIRES store=false (confirmed via testing) @@ -789,10 +832,11 @@ export async function transformRequestBody( if (body.input && Array.isArray(body.input)) { let inputItems: InputItem[] = body.input; - if (shouldApplyFastSessionTuning) { + if (shouldApplyFastSessionTuning && !resolvedDeferFastSessionInputTrimming) { inputItems = trimInputForFastSession(inputItems, resolvedFastSessionMaxInputItems, { - preferLatestUserOnly: shouldPreferLatestUserOnly, + preferLatestUserOnly: + fastSessionInputTrimPlan.trim?.preferLatestUserOnly ?? false, }) ?? inputItems; } diff --git a/lib/request/response-compaction.ts b/lib/request/response-compaction.ts new file mode 100644 index 00000000..d61151fe --- /dev/null +++ b/lib/request/response-compaction.ts @@ -0,0 +1,158 @@ +import { logDebug, logWarn } from "../logger.js"; +import type { InputItem, RequestBody } from "../types.js"; +import { isRecord } from "../utils.js"; +import { getModelCapabilities } from "./helpers/model-map.js"; +import { trimInputForFastSession } from "./request-transformer.js"; + +export interface DeferredFastSessionInputTrim { + maxItems: number; + preferLatestUserOnly: boolean; +} + +export interface ResponseCompactionResult { + body: RequestBody; + mode: "compacted" | "trimmed" | "unchanged"; +} + +export interface ApplyResponseCompactionParams { + body: RequestBody; + requestUrl: string; + headers: Headers; + trim: DeferredFastSessionInputTrim; + fetchImpl: typeof fetch; + signal?: AbortSignal | null; + timeoutMs?: number; +} + +function isInputItemArray(value: unknown): value is InputItem[] { + return Array.isArray(value) && value.every((item) => isRecord(item)); +} + +function extractCompactedInput(payload: unknown): InputItem[] | undefined { + if (!isRecord(payload)) return undefined; + if (isInputItemArray(payload.output)) return payload.output; + if (isInputItemArray(payload.input)) return payload.input; + + const response = payload.response; + if (!isRecord(response)) return undefined; + if (isInputItemArray(response.output)) return response.output; + if (isInputItemArray(response.input)) return response.input; + return undefined; +} + +function buildCompactionUrl(requestUrl: string): string { + return requestUrl.endsWith("/compact") ? requestUrl : `${requestUrl}/compact`; +} + +function createFallbackBody( + body: RequestBody, + trim: DeferredFastSessionInputTrim, +): RequestBody | undefined { + if (!Array.isArray(body.input)) return undefined; + const trimmedInput = + trimInputForFastSession(body.input, trim.maxItems, { + preferLatestUserOnly: trim.preferLatestUserOnly, + }) ?? body.input; + + return trimmedInput === body.input ? undefined : { ...body, input: trimmedInput }; +} + +function createTimedAbortSignal( + signal: AbortSignal | null | undefined, + timeoutMs: number, +): { signal: AbortSignal; cleanup: () => void } { + const controller = new AbortController(); + const timeout = setTimeout(() => { + controller.abort(new Error("Response compaction timeout")); + }, timeoutMs); + + const onAbort = () => { + controller.abort(signal?.reason ?? new Error("Aborted")); + }; + + if (signal?.aborted) { + onAbort(); + } else if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + + return { + signal: controller.signal, + cleanup: () => { + clearTimeout(timeout); + signal?.removeEventListener("abort", onAbort); + }, + }; +} + +export async function applyResponseCompaction( + params: ApplyResponseCompactionParams, +): Promise { + const fallbackBody = createFallbackBody(params.body, params.trim); + if (!fallbackBody) { + return { body: params.body, mode: "unchanged" }; + } + + if (!getModelCapabilities(params.body.model).compaction) { + return { body: fallbackBody, mode: "trimmed" }; + } + + const compactionHeaders = new Headers(params.headers); + compactionHeaders.set("accept", "application/json"); + compactionHeaders.set("content-type", "application/json"); + const { signal, cleanup } = createTimedAbortSignal( + params.signal, + Math.max(250, params.timeoutMs ?? 4_000), + ); + + try { + const response = await params.fetchImpl(buildCompactionUrl(params.requestUrl), { + method: "POST", + headers: compactionHeaders, + body: JSON.stringify({ + model: params.body.model, + input: params.body.input, + }), + signal, + }); + + if (!response.ok) { + logWarn("Responses compaction request failed; using trim fallback.", { + status: response.status, + statusText: response.statusText, + model: params.body.model, + }); + return { body: fallbackBody, mode: "trimmed" }; + } + + const payload = (await response.json()) as unknown; + const compactedInput = extractCompactedInput(payload); + if (!compactedInput || compactedInput.length === 0) { + logWarn("Responses compaction returned no reusable input; using trim fallback.", { + model: params.body.model, + }); + return { body: fallbackBody, mode: "trimmed" }; + } + + logDebug("Applied server-side response compaction.", { + model: params.body.model, + originalInputLength: Array.isArray(params.body.input) ? params.body.input.length : 0, + compactedInputLength: compactedInput.length, + }); + return { body: { ...params.body, input: compactedInput }, mode: "compacted" }; + } catch (error) { + if (signal.aborted && params.signal?.aborted) { + throw params.signal.reason instanceof Error + ? params.signal.reason + : new Error("Aborted"); + } + + logWarn("Responses compaction failed; using trim fallback.", { + model: params.body.model, + error: error instanceof Error ? error.message : String(error), + }); + return { body: fallbackBody, mode: "trimmed" }; + } finally { + cleanup(); + } +} diff --git a/test/codex-manager-cli.test.ts b/test/codex-manager-cli.test.ts index 613d6c93..ef1a6ead 100644 --- a/test/codex-manager-cli.test.ts +++ b/test/codex-manager-cli.test.ts @@ -5707,7 +5707,7 @@ describe("codex manager cli commands", () => { normalized: string; remapped: boolean; promptFamily: string; - capabilities: { toolSearch: boolean; computerUse: boolean }; + capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean }; }; }; expect(payload.command).toBe("report"); @@ -5722,6 +5722,7 @@ describe("codex manager cli commands", () => { capabilities: { toolSearch: false, computerUse: false, + compaction: false, }, }); }); @@ -5760,7 +5761,7 @@ describe("codex manager cli commands", () => { normalized: string; remapped: boolean; promptFamily: string; - capabilities: { toolSearch: boolean; computerUse: boolean }; + capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean }; }; }; expect(payload.modelSelection).toEqual({ @@ -5769,8 +5770,9 @@ describe("codex manager cli commands", () => { remapped: true, promptFamily: "gpt-5.2", capabilities: { - toolSearch: false, - computerUse: false, + toolSearch: true, + computerUse: true, + compaction: true, }, }); }); diff --git a/test/index.test.ts b/test/index.test.ts index fb89f4e6..7882bc6f 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -136,9 +136,13 @@ vi.mock("../lib/live-account-sync.js", () => ({ LiveAccountSync: liveAccountSyncCtorMock, })); -vi.mock("../lib/request/request-transformer.js", () => ({ - applyFastSessionDefaults: (config: T) => config, -})); +vi.mock("../lib/request/request-transformer.js", async () => { + const actual = await vi.importActual("../lib/request/request-transformer.js"); + return { + ...(actual as Record), + applyFastSessionDefaults: (config: T) => config, + }; +}); vi.mock("../lib/logger.js", () => ({ initLogger: vi.fn(), @@ -1431,6 +1435,59 @@ describe("OpenAIOAuthPlugin fetch handler", () => { expect(secondBody?.previous_response_id).toBe("resp_explicit_456"); }); + it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => { + const fetchHelpers = await import("../lib/request/fetch-helpers.js"); + const longInput = Array.from({ length: 12 }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "system prompt" : `message-${index}`, + })); + const compactedInput = [ + { + type: "message", + role: "assistant", + content: "compacted summary", + }, + ]; + + vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({ + updatedInit: { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }, + body: { model: "gpt-5-mini", input: longInput }, + deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false }, + }); + + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce( + new Response(JSON.stringify({ output: compactedInput }), { status: 200 }), + ) + .mockResolvedValueOnce( + new Response(JSON.stringify({ content: "ok" }), { status: 200 }), + ); + + const { sdk } = await setupPlugin(); + const response = await sdk.fetch!("https://api.openai.com/v1/chat", { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }); + + expect(response.status).toBe(200); + expect(globalThis.fetch).toHaveBeenCalledTimes(2); + expect(vi.mocked(globalThis.fetch).mock.calls[0]?.[0]).toBe( + "https://api.openai.com/v1/chat/compact", + ); + + const upstreamInit = vi.mocked(globalThis.fetch).mock.calls[1]?.[1] as RequestInit; + const upstreamBody = + typeof upstreamInit.body === "string" + ? (JSON.parse(upstreamInit.body) as { input?: unknown[] }) + : {}; + expect(upstreamBody.input).toEqual(compactedInput); + }); + it("uses the refreshed token email when checking entitlement blocks", async () => { const { AccountManager } = await import("../lib/accounts.js"); const manager = buildRoutingManager([ diff --git a/test/model-map.test.ts b/test/model-map.test.ts index 6ad16967..7d2f8adb 100644 --- a/test/model-map.test.ts +++ b/test/model-map.test.ts @@ -84,14 +84,22 @@ describe("model map", () => { expect(getModelCapabilities("gpt-5.4")).toEqual({ toolSearch: true, computerUse: true, + compaction: true, }); expect(getModelCapabilities("gpt-5.4-pro")).toEqual({ toolSearch: false, computerUse: true, + compaction: true, }); expect(getModelCapabilities("gpt-5-mini")).toEqual({ + toolSearch: true, + computerUse: true, + compaction: true, + }); + expect(getModelCapabilities("gpt-5-nano")).toEqual({ toolSearch: false, computerUse: false, + compaction: true, }); }); }); diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index 51eb1214..17efbbcf 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -653,9 +653,31 @@ describe('Request Transformer Module', () => { }, }, }; - const result = await transformRequestBody(body, codexInstructions); - expect(result.text?.verbosity).toBe('medium'); - expect(result.text?.format).toEqual(body.text?.format); + const result = await transformRequestBody(body, codexInstructions); + expect(result.text?.verbosity).toBe('medium'); + expect(result.text?.format).toEqual(body.text?.format); + }); + + it('defers fast-session input trimming when requested for downstream compaction', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: Array.from({ length: 12 }, (_value, index) => ({ + type: 'message', + role: index === 0 ? 'developer' : 'user', + content: index === 0 ? 'system prompt' : `message-${index}`, + })), + }; + const result = await transformRequestBody( + body, + codexInstructions, + { global: {}, models: {} }, + true, + true, + 'always', + 8, + true, + ); + expect(result.input).toHaveLength(12); }); it('should set required Codex fields', async () => { diff --git a/test/response-compaction.test.ts b/test/response-compaction.test.ts new file mode 100644 index 00000000..649532ee --- /dev/null +++ b/test/response-compaction.test.ts @@ -0,0 +1,115 @@ +import { applyResponseCompaction } from "../lib/request/response-compaction.js"; +import type { RequestBody } from "../lib/types.js"; + +function buildInput(length: number) { + return Array.from({ length }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "system prompt" : `message-${index}`, + })); +} + +describe("response compaction", () => { + it("returns unchanged when the fast-session trim would be a no-op", async () => { + const body: RequestBody = { + model: "gpt-5.4", + input: buildInput(2), + }; + const fetchImpl = vi.fn(); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("unchanged"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result.body.input).toEqual(body.input); + }); + + it("falls back to local trimming when the model does not support compaction", async () => { + const body: RequestBody = { + model: "gpt-5-codex", + input: buildInput(10), + }; + const fetchImpl = vi.fn(); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("trimmed"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result.body.input).toHaveLength(8); + }); + + it("replaces request input with server-compacted output when available", async () => { + const compactedOutput = [ + { + type: "message", + role: "assistant", + content: "compacted summary", + }, + ]; + const body: RequestBody = { + model: "gpt-5-mini", + input: buildInput(12), + }; + const fetchImpl = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ output: compactedOutput }), { status: 200 }), + ); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers({ accept: "text/event-stream" }), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("compacted"); + expect(result.body.input).toEqual(compactedOutput); + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(fetchImpl).toHaveBeenCalledWith( + "https://chatgpt.com/backend-api/codex/responses/compact", + expect.objectContaining({ + method: "POST", + headers: expect.any(Headers), + }), + ); + + const requestInit = vi.mocked(fetchImpl).mock.calls[0]?.[1]; + const headers = new Headers(requestInit?.headers); + expect(headers.get("accept")).toBe("application/json"); + expect(headers.get("content-type")).toBe("application/json"); + }); + + it("falls back to local trimming when the compaction request fails", async () => { + const body: RequestBody = { + model: "gpt-5.4", + input: buildInput(12), + }; + const fetchImpl = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ error: { message: "nope" } }), { status: 404 }), + ); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("trimmed"); + expect(result.body.input).toHaveLength(8); + expect(fetchImpl).toHaveBeenCalledTimes(1); + }); +}); From 21df8136a0a76323541e4d96fe1fbc9a972bce58 Mon Sep 17 00:00:00 2001 From: ndycode Date: Sun, 22 Mar 2026 16:40:41 +0800 Subject: [PATCH 3/5] type responses text format and prompt cache retention --- lib/request/request-transformer.ts | 18 +++++++++++++ lib/types.ts | 10 ++++++- test/public-api-contract.test.ts | 15 +++++++++++ test/request-transformer.test.ts | 42 ++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts index 3f6a3353..5a407f55 100644 --- a/lib/request/request-transformer.ts +++ b/lib/request/request-transformer.ts @@ -198,6 +198,18 @@ function resolveTextVerbosity( ); } +function resolvePromptCacheRetention( + modelConfig: ConfigOptions, + body: RequestBody, +): RequestBody["prompt_cache_retention"] { + const providerOpenAI = body.providerOptions?.openai; + return ( + body.prompt_cache_retention ?? + providerOpenAI?.promptCacheRetention ?? + modelConfig.promptCacheRetention + ); +} + function resolveInclude(modelConfig: ConfigOptions, body: RequestBody): string[] { const providerOpenAI = body.providerOptions?.openai; const base = @@ -899,11 +911,17 @@ export async function transformRequestBody( // Configure text verbosity (support user config) // Default: "medium" (matches Codex CLI default for all GPT-5 models) + // Preserve any structured-output `text.format` contract from the host. body.text = { ...body.text, verbosity: resolveTextVerbosity(modelConfig, body), }; + const promptCacheRetention = resolvePromptCacheRetention(modelConfig, body); + if (promptCacheRetention !== undefined) { + body.prompt_cache_retention = promptCacheRetention; + } + if (shouldApplyFastSessionTuning) { // In fast-session mode, prioritize speed by clamping to minimum reasoning + verbosity. // getReasoningConfig normalizes unsupported values per model family. diff --git a/lib/types.ts b/lib/types.ts index 1feeb8a9..17323401 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -24,9 +24,17 @@ export interface ConfigOptions { reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh"; reasoningSummary?: "auto" | "concise" | "detailed" | "off" | "on"; textVerbosity?: "low" | "medium" | "high"; + promptCacheRetention?: PromptCacheRetention; include?: string[]; } +export type PromptCacheRetention = + | "5m" + | "1h" + | "24h" + | "7d" + | (string & {}); + export interface ReasoningConfig { effort: "none" | "minimal" | "low" | "medium" | "high" | "xhigh"; summary: "auto" | "concise" | "detailed"; @@ -131,7 +139,7 @@ export interface RequestBody { /** Stable key to enable prompt-token caching on Codex backend */ prompt_cache_key?: string; /** Retention mode for server-side prompt cache entries */ - prompt_cache_retention?: string; + prompt_cache_retention?: PromptCacheRetention; /** Resume a prior Responses API turn without resending the full transcript */ previous_response_id?: string; max_output_tokens?: number; diff --git a/test/public-api-contract.test.ts b/test/public-api-contract.test.ts index 307093f3..89aa891a 100644 --- a/test/public-api-contract.test.ts +++ b/test/public-api-contract.test.ts @@ -116,6 +116,21 @@ describe("public api contract", () => { const baseBody: RequestBody = { model: "gpt-5-codex", input: [{ type: "message", role: "user", content: "hi" }], + prompt_cache_retention: "24h", + text: { + format: { + type: "json_schema", + name: "compat_response", + schema: { + type: "object", + properties: { + answer: { type: "string" }, + }, + required: ["answer"], + }, + strict: true, + }, + }, }; const transformedPositional = await transformRequestBody( JSON.parse(JSON.stringify(baseBody)) as RequestBody, diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index 17efbbcf..a43b3c1e 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -634,6 +634,35 @@ describe('Request Transformer Module', () => { expect(result.prompt_cache_retention).toBe('24h'); }); + it('uses prompt_cache_retention from providerOptions when body omits it', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions); + expect(result.prompt_cache_retention).toBe('1h'); + }); + + it('prefers body prompt_cache_retention over providerOptions', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + prompt_cache_retention: '24h', + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions); + expect(result.prompt_cache_retention).toBe('24h'); + }); + it('preserves text.format when applying text verbosity defaults', async () => { const body: RequestBody = { model: 'gpt-5.4', @@ -1254,6 +1283,19 @@ describe('Request Transformer Module', () => { expect(result.text?.verbosity).toBe('low'); }); + it('should inherit prompt_cache_retention from user config', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: {}, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('7d'); + }); + it('should prefer body text verbosity over providerOptions', async () => { const body: RequestBody = { model: 'gpt-5', From 456cbaf3ea7d944580cfe5533dffe2df31adb098 Mon Sep 17 00:00:00 2001 From: ndycode <405533+ndycode@users.noreply.github.com> Date: Sun, 22 Mar 2026 16:57:48 +0800 Subject: [PATCH 4/5] Add provider prompt cache precedence coverage --- test/request-transformer.test.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index a43b3c1e..98dc4d2f 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -648,6 +648,24 @@ describe('Request Transformer Module', () => { expect(result.prompt_cache_retention).toBe('1h'); }); + it('prefers providerOptions prompt_cache_retention over user config defaults', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + providerOptions: { + openai: { + promptCacheRetention: '1h', + }, + }, + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: {}, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('1h'); + }); + it('prefers body prompt_cache_retention over providerOptions', async () => { const body: RequestBody = { model: 'gpt-5.4', From a0e3841a540285a27cfa01b9e417460e48759a94 Mon Sep 17 00:00:00 2001 From: ndycode <405533+ndycode@users.noreply.github.com> Date: Sun, 22 Mar 2026 18:48:25 +0800 Subject: [PATCH 5/5] add prompt cache retention regressions --- test/public-api-contract.test.ts | 4 ++++ test/request-transformer.test.ts | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/test/public-api-contract.test.ts b/test/public-api-contract.test.ts index 89aa891a..a9d9a484 100644 --- a/test/public-api-contract.test.ts +++ b/test/public-api-contract.test.ts @@ -141,5 +141,9 @@ describe("public api contract", () => { codexInstructions: "codex", }); expect(transformedNamed).toEqual(transformedPositional); + expect(transformedPositional.prompt_cache_retention).toBe(baseBody.prompt_cache_retention); + expect(transformedNamed.prompt_cache_retention).toBe(baseBody.prompt_cache_retention); + expect(transformedPositional.text?.format).toEqual(baseBody.text?.format); + expect(transformedNamed.text?.format).toEqual(baseBody.text?.format); }); }); diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index 98dc4d2f..af8d5c84 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -1314,6 +1314,43 @@ describe('Request Transformer Module', () => { expect(result.prompt_cache_retention).toBe('7d'); }); + it('should inherit prompt_cache_retention from model-specific user config', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: [], + }; + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: { + 'gpt-5.4': { + options: { promptCacheRetention: '24h' }, + }, + }, + }; + const result = await transformRequestBody(body, codexInstructions, userConfig); + expect(result.prompt_cache_retention).toBe('24h'); + }); + + it('should inherit model-specific prompt_cache_retention in named params overload', async () => { + const userConfig: UserConfig = { + global: { promptCacheRetention: '7d' }, + models: { + 'gpt-5.4': { + options: { promptCacheRetention: '24h' }, + }, + }, + }; + const result = await transformRequestBody({ + body: { + model: 'gpt-5.4', + input: [], + }, + codexInstructions, + userConfig, + }); + expect(result.prompt_cache_retention).toBe('24h'); + }); + it('should prefer body text verbosity over providerOptions', async () => { const body: RequestBody = { model: 'gpt-5',