From 9968ba8c5948383a313d9f43b8bcdf0c552d8564 Mon Sep 17 00:00:00 2001
From: ndycode <noreply@users.noreply.github.com>
Date: Sun, 22 Mar 2026 16:02:34 +0800
Subject: [PATCH 1/5] enhance responses parser for semantic SSE events

---
 lib/request/response-handler.ts | 426 +++++++++++++++++++++++++++++++-
 test/fetch-helpers.test.ts      |  19 ++
 test/response-handler.test.ts   |  82 ++++++
 3 files changed, 516 insertions(+), 11 deletions(-)

diff --git a/lib/request/response-handler.ts b/lib/request/response-handler.ts
index b8db279b..a36f89ae 100644
--- a/lib/request/response-handler.ts
+++ b/lib/request/response-handler.ts
@@ -1,5 +1,6 @@
 import { createLogger, logRequest, LOGGING_ENABLED } from "../logger.js";
 import { PLUGIN_NAME } from "../constants.js";
+import { isRecord } from "../utils.js";
 
 import type { SSEEventData } from "../types.js";
 
@@ -8,6 +9,322 @@ const log = createLogger("response-handler");
 const MAX_SSE_SIZE = 10 * 1024 * 1024; // 10MB limit to prevent memory exhaustion
 const DEFAULT_STREAM_STALL_TIMEOUT_MS = 45_000;
 
+type MutableRecord = Record<string, unknown>;
+
+interface ParsedResponseState {
+	finalResponse: MutableRecord | null;
+	lastPhase: string | null;
+	outputItems: Map<number, MutableRecord>;
+	outputText: Map<string, string>;
+	phaseText: Map<string, string>;
+	reasoningSummaryText: Map<string, string>;
+}
+
+function createParsedResponseState(): ParsedResponseState {
+	return {
+		finalResponse: null,
+		lastPhase: null,
+		outputItems: new Map<number, MutableRecord>(),
+		outputText: new Map<string, string>(),
+		phaseText: new Map<string, string>(),
+		reasoningSummaryText: new Map<string, string>(),
+	};
+}
+
+function toMutableRecord(value: unknown): MutableRecord | null {
+	return isRecord(value) ? { ...value } : null;
+}
+
+function getNumberField(record: MutableRecord, key: string): number | null {
+	const value = record[key];
+	return typeof value === "number" && Number.isFinite(value) ? value : null;
+}
+
+function getStringField(record: MutableRecord, key: string): string | null {
+	const value = record[key];
+	return typeof value === "string" && value.trim().length > 0 ? value : null;
+}
+
+function cloneContentArray(content: unknown): MutableRecord[] {
+	if (!Array.isArray(content)) return [];
+	return content.filter(isRecord).map((part) => ({ ...part }));
+}
+
+function mergeRecord(base: MutableRecord | null, update: MutableRecord): MutableRecord {
+	if (!base) return { ...update };
+	const merged: MutableRecord = { ...base, ...update };
+	if ("content" in update || "content" in base) {
+		merged.content = cloneContentArray(update.content ?? base.content);
+	}
+	return merged;
+}
+
+function makeOutputTextKey(outputIndex: number | null, contentIndex: number | null): string | null {
+	if (outputIndex === null || contentIndex === null) return null;
+	return `${outputIndex}:${contentIndex}`;
+}
+
+function makeSummaryKey(outputIndex: number | null, summaryIndex: number | null): string | null {
+	if (outputIndex === null || summaryIndex === null) return null;
+	return `${outputIndex}:${summaryIndex}`;
+}
+
+function getPartText(part: unknown): string | null {
+	if (!isRecord(part)) return null;
+	const text = getStringField(part, "text");
+	if (text) return text;
+	return null;
+}
+
+function capturePhase(
+	state: ParsedResponseState,
+	phase: unknown,
+	text: string | null = null,
+): void {
+	if (typeof phase !== "string" || phase.trim().length === 0) return;
+	const normalizedPhase = phase.trim();
+	state.lastPhase = normalizedPhase;
+	if (text && text.length > 0) {
+		const existing = state.phaseText.get(normalizedPhase) ?? "";
+		state.phaseText.set(normalizedPhase, `${existing}${text}`);
+	}
+}
+
+function upsertOutputItem(state: ParsedResponseState, outputIndex: number | null, item: unknown): void {
+	if (outputIndex === null || !isRecord(item)) return;
+	const current = state.outputItems.get(outputIndex) ?? null;
+	const merged = mergeRecord(current, item);
+	state.outputItems.set(outputIndex, merged);
+	capturePhase(state, merged.phase);
+}
+
+function setOutputTextValue(
+	state: ParsedResponseState,
+	outputIndex: number | null,
+	contentIndex: number | null,
+	text: string | null,
+	phase: unknown = undefined,
+): void {
+	if (!text) return;
+	const key = makeOutputTextKey(outputIndex, contentIndex);
+	if (!key) return;
+	const existing = state.outputText.get(key) ?? "";
+	state.outputText.set(key, text);
+	const phaseDelta = existing.length > 0 && text.startsWith(existing)
+		? text.slice(existing.length)
+		: existing === text
+			? ""
+			: text;
+	capturePhase(state, phase, phaseDelta);
+}
+
+function appendOutputTextValue(
+	state: ParsedResponseState,
+	outputIndex: number | null,
+	contentIndex: number | null,
+	delta: string | null,
+	phase: unknown = undefined,
+): void {
+	if (!delta) return;
+	const key = makeOutputTextKey(outputIndex, contentIndex);
+	if (!key) return;
+	const existing = state.outputText.get(key) ?? "";
+	state.outputText.set(key, `${existing}${delta}`);
+	capturePhase(state, phase, delta);
+}
+
+function setReasoningSummaryValue(
+	state: ParsedResponseState,
+	outputIndex: number | null,
+	summaryIndex: number | null,
+	text: string | null,
+): void {
+	if (!text) return;
+	const key = makeSummaryKey(outputIndex, summaryIndex);
+	if (!key) return;
+	state.reasoningSummaryText.set(key, text);
+}
+
+function appendReasoningSummaryValue(
+	state: ParsedResponseState,
+	outputIndex: number | null,
+	summaryIndex: number | null,
+	delta: string | null,
+): void {
+	if (!delta) return;
+	const key = makeSummaryKey(outputIndex, summaryIndex);
+	if (!key) return;
+	const existing = state.reasoningSummaryText.get(key) ?? "";
+	state.reasoningSummaryText.set(key, `${existing}${delta}`);
+}
+
+function ensureOutputItemAtIndex(output: unknown[], index: number): MutableRecord | null {
+	while (output.length <= index) {
+		output.push({});
+	}
+	const current = output[index];
+	if (!isRecord(current)) {
+		output[index] = {};
+	}
+	return isRecord(output[index]) ? (output[index] as MutableRecord) : null;
+}
+
+function ensureContentPartAtIndex(item: MutableRecord, index: number): MutableRecord | null {
+	const content = Array.isArray(item.content) ? [...item.content] : [];
+	while (content.length <= index) {
+		content.push({});
+	}
+	const current = content[index];
+	if (!isRecord(current)) {
+		content[index] = {};
+	}
+	item.content = content;
+	return isRecord(content[index]) ? (content[index] as MutableRecord) : null;
+}
+
+function applyAccumulatedOutputText(response: MutableRecord, state: ParsedResponseState): void {
+	if (state.outputText.size === 0) return;
+	const output = Array.isArray(response.output) ? [...response.output] : [];
+
+	for (const [key, text] of state.outputText.entries()) {
+		const [outputIndexText, contentIndexText] = key.split(":");
+		const outputIndex = Number.parseInt(outputIndexText ?? "", 10);
+		const contentIndex = Number.parseInt(contentIndexText ?? "", 10);
+		if (!Number.isFinite(outputIndex) || !Number.isFinite(contentIndex)) continue;
+		const item = ensureOutputItemAtIndex(output, outputIndex);
+		if (!item) continue;
+		const part = ensureContentPartAtIndex(item, contentIndex);
+		if (!part) continue;
+		if (!getStringField(part, "type")) {
+			part.type = "output_text";
+		}
+		part.text = text;
+	}
+
+	if (output.length > 0) {
+		response.output = output;
+	}
+}
+
+function mergeOutputItemsIntoResponse(response: MutableRecord, state: ParsedResponseState): void {
+	if (state.outputItems.size === 0) return;
+	const output = Array.isArray(response.output) ? [...response.output] : [];
+
+	for (const [outputIndex, item] of state.outputItems.entries()) {
+		while (output.length <= outputIndex) {
+			output.push({});
+		}
+		output[outputIndex] = mergeRecord(toMutableRecord(output[outputIndex]), item);
+	}
+
+	response.output = output;
+}
+
+function collectMessageOutputText(output: unknown[]): string {
+	return output
+		.filter(isRecord)
+		.map((item) => {
+			if (item.type !== "message") return "";
+			const content = Array.isArray(item.content) ? item.content : [];
+			return content
+				.filter(isRecord)
+				.map((part) => {
+					if (part.type !== "output_text") return "";
+					return typeof part.text === "string" ? part.text : "";
+				})
+				.join("");
+		})
+		.filter((text) => text.length > 0)
+		.join("");
+}
+
+function collectReasoningSummaryText(output: unknown[]): string {
+	return output
+		.filter(isRecord)
+		.map((item) => {
+			if (item.type !== "reasoning") return "";
+			const summary = Array.isArray(item.summary) ? item.summary : [];
+			return summary
+				.filter(isRecord)
+				.map((part) => (typeof part.text === "string" ? part.text : ""))
+				.filter((text) => text.length > 0)
+				.join("\n\n");
+		})
+		.filter((text) => text.length > 0)
+		.join("\n\n");
+}
+
+function applyReasoningSummaries(response: MutableRecord, state: ParsedResponseState): void {
+	if (state.reasoningSummaryText.size === 0) return;
+	const output = Array.isArray(response.output) ? [...response.output] : [];
+
+	for (const [key, text] of state.reasoningSummaryText.entries()) {
+		const [outputIndexText, summaryIndexText] = key.split(":");
+		const outputIndex = Number.parseInt(outputIndexText ?? "", 10);
+		const summaryIndex = Number.parseInt(summaryIndexText ?? "", 10);
+		if (!Number.isFinite(outputIndex) || !Number.isFinite(summaryIndex)) continue;
+		const item = ensureOutputItemAtIndex(output, outputIndex);
+		if (!item) continue;
+		const summary = Array.isArray(item.summary) ? [...item.summary] : [];
+		while (summary.length <= summaryIndex) {
+			summary.push({});
+		}
+		const current = summary[summaryIndex];
+		const nextPart = isRecord(current) ? { ...current } : {};
+		if (!getStringField(nextPart, "type")) {
+			nextPart.type = "summary_text";
+		}
+		nextPart.text = text;
+		summary[summaryIndex] = nextPart;
+		item.summary = summary;
+		if (!getStringField(item, "type")) {
+			item.type = "reasoning";
+		}
+	}
+
+	if (output.length > 0) {
+		response.output = output;
+	}
+}
+
+function finalizeParsedResponse(state: ParsedResponseState): MutableRecord | null {
+	const response = state.finalResponse ? { ...state.finalResponse } : null;
+	if (!response) return null;
+
+	mergeOutputItemsIntoResponse(response, state);
+	applyAccumulatedOutputText(response, state);
+	applyReasoningSummaries(response, state);
+
+	const output = Array.isArray(response.output) ? response.output : [];
+	if (typeof response.output_text !== "string") {
+		const outputText = collectMessageOutputText(output);
+		if (outputText.length > 0) {
+			response.output_text = outputText;
+		}
+	}
+
+	const reasoningSummaryText = collectReasoningSummaryText(output);
+	if (reasoningSummaryText.length > 0) {
+		response.reasoning_summary_text = reasoningSummaryText;
+	}
+
+	if (state.lastPhase && typeof response.phase !== "string") {
+		response.phase = state.lastPhase;
+	}
+
+	if (state.phaseText.size > 0) {
+		const phaseText: MutableRecord = {};
+		for (const [phase, text] of state.phaseText.entries()) {
+			phaseText[phase] = text;
+			if (phase === "commentary") response.commentary_text = text;
+			if (phase === "final_answer") response.final_answer_text = text;
+		}
+		response.phase_text = phaseText;
+	}
+
+	return response;
+}
+
 function extractResponseId(response: unknown): string | null {
 	if (!response || typeof response !== "object") return null;
 	const candidate = (response as { id?: unknown }).id;
@@ -33,20 +350,105 @@ function notifyResponseId(
 }
 
 function maybeCaptureResponseEvent(
+	state: ParsedResponseState,
 	data: SSEEventData,
 	onResponseId?: (responseId: string) => void,
-): unknown | null {
+): void {
 	if (data.type === "error") {
 		log.error("SSE error event received", { error: data });
-		return null;
+		return;
 	}
 
-	if (data.type === "response.done" || data.type === "response.completed") {
+	if (isRecord(data.response)) {
+		state.finalResponse = { ...data.response };
 		notifyResponseId(onResponseId, data.response);
-		return data.response ?? null;
 	}
 
-	return null;
+	if (data.type === "response.done" || data.type === "response.completed") {
+		return;
+	}
+
+	const eventRecord = toMutableRecord(data);
+	if (!eventRecord) return;
+	const outputIndex = getNumberField(eventRecord, "output_index");
+
+	if (data.type === "response.output_item.added" || data.type === "response.output_item.done") {
+		upsertOutputItem(state, outputIndex, eventRecord.item);
+		return;
+	}
+
+	if (data.type === "response.output_text.delta") {
+		appendOutputTextValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "content_index"),
+			getStringField(eventRecord, "delta"),
+			eventRecord.phase,
+		);
+		return;
+	}
+
+	if (data.type === "response.output_text.done") {
+		setOutputTextValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "content_index"),
+			getStringField(eventRecord, "text"),
+			eventRecord.phase,
+		);
+		return;
+	}
+
+	if (data.type === "response.content_part.added" || data.type === "response.content_part.done") {
+		const part = toMutableRecord(eventRecord.part);
+		if (!part || getStringField(part, "type") !== "output_text") {
+			capturePhase(state, part?.phase);
+			return;
+		}
+		setOutputTextValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "content_index"),
+			getPartText(part),
+			part.phase,
+		);
+		return;
+	}
+
+	if (data.type === "response.reasoning_summary_text.delta") {
+		appendReasoningSummaryValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "summary_index"),
+			getStringField(eventRecord, "delta"),
+		);
+		return;
+	}
+
+	if (data.type === "response.reasoning_summary_text.done") {
+		setReasoningSummaryValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "summary_index"),
+			getStringField(eventRecord, "text"),
+		);
+		return;
+	}
+
+	if (
+		data.type === "response.reasoning_summary_part.added" ||
+		data.type === "response.reasoning_summary_part.done"
+	) {
+		setReasoningSummaryValue(
+			state,
+			outputIndex,
+			getNumberField(eventRecord, "summary_index"),
+			getPartText(eventRecord.part),
+		);
+		return;
+	}
+
+	capturePhase(state, eventRecord.phase);
 }
 
 /**
@@ -60,6 +462,7 @@ function parseSseStream(
 	onResponseId?: (responseId: string) => void,
 ): unknown | null {
 	const lines = sseText.split(/\r?\n/);
+	const state = createParsedResponseState();
 
 	for (const line of lines) {
 		const trimmedLine = line.trim();
@@ -68,15 +471,14 @@ function parseSseStream(
 			if (!payload || payload === '[DONE]') continue;
 			try {
 				const data = JSON.parse(payload) as SSEEventData;
-				const finalResponse = maybeCaptureResponseEvent(data, onResponseId);
-				if (finalResponse) return finalResponse;
+				maybeCaptureResponseEvent(state, data, onResponseId);
 			} catch {
 				// Skip malformed JSON
 			}
 		}
 	}
 
-	return null;
+	return finalizeParsedResponse(state);
 }
 
 /**
@@ -125,7 +527,9 @@ export async function convertSseToJson(
 		if (!finalResponse) {
 			log.warn("Could not find final response in SSE stream");
 
-			logRequest("stream-error", { error: "No response.done event found" });
+			logRequest("stream-error", {
+				error: "No terminal response event found in SSE stream",
+			});
 
 			// Return original stream if we can't parse
 			return new Response(fullText, {
@@ -181,7 +585,7 @@ function createResponseIdCapturingStream(
 			if (!payload || payload === "[DONE]") continue;
 			try {
 				const data = JSON.parse(payload) as SSEEventData;
-				maybeCaptureResponseEvent(data, onResponseId);
+				maybeCaptureResponseEvent(createParsedResponseState(), data, onResponseId);
 			} catch {
 				// Ignore malformed SSE lines and keep forwarding the raw stream.
 			}
@@ -230,7 +634,7 @@ async function readWithTimeout(
 				timeoutId = setTimeout(() => {
 					reject(
 						new Error(
-							`SSE stream stalled for ${timeoutMs}ms while waiting for response.done`,
+							`SSE stream stalled for ${timeoutMs}ms while waiting for a terminal response event`,
 						),
 					);
 				}, timeoutMs);
diff --git a/test/fetch-helpers.test.ts b/test/fetch-helpers.test.ts
index 518a725c..f90108e7 100644
--- a/test/fetch-helpers.test.ts
+++ b/test/fetch-helpers.test.ts
@@ -743,6 +743,25 @@ describe('createEntitlementErrorResponse', () => {
 			const text = await result.text();
 			expect(text).toBe('stream body');
 		});
+
+		it('captures response ids from streaming semantic SSE without rewriting the stream', async () => {
+			const onResponseId = vi.fn();
+			const response = new Response(
+				[
+					'data: {"type":"response.created","response":{"id":"resp_stream_123"}}',
+					'',
+					'data: {"type":"response.done","response":{"id":"resp_stream_123"}}',
+					'',
+				].join('\n'),
+				{ status: 200, headers: new Headers({ 'content-type': 'text/event-stream' }) },
+			);
+
+			const result = await handleSuccessResponse(response, true, { onResponseId });
+			const text = await result.text();
+
+			expect(text).toContain('"resp_stream_123"');
+			expect(onResponseId).toHaveBeenCalledWith('resp_stream_123');
+		});
 	});
 
 	describe('handleErrorResponse error normalization', () => {
diff --git a/test/response-handler.test.ts b/test/response-handler.test.ts
index 880554c6..2fbc7538 100644
--- a/test/response-handler.test.ts
+++ b/test/response-handler.test.ts
@@ -66,6 +66,88 @@ data: {"type":"response.completed","response":{"id":"resp_456","output":"done"}}
 			expect(body).toEqual({ id: 'resp_456', output: 'done' });
 		});
 
+		it('synthesizes output_text and reasoning summaries from semantic SSE events', async () => {
+			const sseContent = [
+				'data: {"type":"response.created","response":{"id":"resp_semantic_123","object":"response"}}',
+				'data: {"type":"response.output_item.added","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"final_answer"}}',
+				'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"Hello ","phase":"final_answer"}',
+				'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"world","phase":"final_answer"}',
+				'data: {"type":"response.output_text.done","output_index":0,"content_index":0,"text":"Hello world","phase":"final_answer"}',
+				'data: {"type":"response.output_item.added","output_index":1,"item":{"id":"rs_123","type":"reasoning"}}',
+				'data: {"type":"response.reasoning_summary_text.delta","output_index":1,"summary_index":0,"delta":"Need more context."}',
+				'data: {"type":"response.reasoning_summary_text.done","output_index":1,"summary_index":0,"text":"Need more context."}',
+				'data: {"type":"response.completed","response":{"id":"resp_semantic_123","object":"response"}}',
+				'',
+			].join('\n');
+			const response = new Response(sseContent);
+			const headers = new Headers();
+
+			const result = await convertSseToJson(response, headers);
+			const body = await result.json() as {
+				id: string;
+				output?: Array<{
+					type?: string;
+					role?: string;
+					phase?: string;
+					content?: Array<{ type?: string; text?: string }>;
+					summary?: Array<{ type?: string; text?: string }>;
+				}>;
+				output_text?: string;
+				reasoning_summary_text?: string;
+				phase?: string;
+				final_answer_text?: string;
+				phase_text?: Record<string, string>;
+			};
+
+			expect(body.id).toBe('resp_semantic_123');
+			expect(body.output_text).toBe('Hello world');
+			expect(body.reasoning_summary_text).toBe('Need more context.');
+			expect(body.phase).toBe('final_answer');
+			expect(body.final_answer_text).toBe('Hello world');
+			expect(body.phase_text).toEqual({ final_answer: 'Hello world' });
+			expect(body.output?.[0]?.content?.[0]).toEqual({
+				type: 'output_text',
+				text: 'Hello world',
+			});
+			expect(body.output?.[1]?.summary?.[0]).toEqual({
+				type: 'summary_text',
+				text: 'Need more context.',
+			});
+		});
+
+		it('tracks commentary and final_answer phase text separately when phase labels are present', async () => {
+			const sseContent = [
+				'data: {"type":"response.created","response":{"id":"resp_phase_123","object":"response"}}',
+				'data: {"type":"response.output_item.added","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"commentary"}}',
+				'data: {"type":"response.output_text.delta","output_index":0,"content_index":0,"delta":"Thinking...","phase":"commentary"}',
+				'data: {"type":"response.output_text.done","output_index":0,"content_index":0,"text":"Thinking...","phase":"commentary"}',
+				'data: {"type":"response.output_item.done","output_index":0,"item":{"id":"msg_123","type":"message","role":"assistant","phase":"final_answer"}}',
+				'data: {"type":"response.output_text.done","output_index":0,"content_index":1,"text":"Done.","phase":"final_answer"}',
+				'data: {"type":"response.done","response":{"id":"resp_phase_123","object":"response"}}',
+				'',
+			].join('\n');
+			const response = new Response(sseContent);
+			const headers = new Headers();
+
+			const result = await convertSseToJson(response, headers);
+			const body = await result.json() as {
+				phase?: string;
+				commentary_text?: string;
+				final_answer_text?: string;
+				phase_text?: Record<string, string>;
+				output_text?: string;
+			};
+
+			expect(body.phase).toBe('final_answer');
+			expect(body.commentary_text).toBe('Thinking...');
+			expect(body.final_answer_text).toBe('Done.');
+			expect(body.phase_text).toEqual({
+				commentary: 'Thinking...',
+				final_answer: 'Done.',
+			});
+			expect(body.output_text).toBe('Thinking...Done.');
+		});
+
 		it('should return original text if no final response found', async () => {
 			const sseContent = `data: {"type":"response.started"}
 data: {"type":"chunk","delta":"text"}

From 71d44c73974f2e91d6647d5ef86339d998deb6f8 Mon Sep 17 00:00:00 2001
From: ndycode <noreply@users.noreply.github.com>
Date: Sun, 22 Mar 2026 16:28:19 +0800
Subject: [PATCH 2/5] add response compaction fallback for fast sessions

---
 index.ts                           |  34 +++++++
 lib/request/fetch-helpers.ts       |  27 ++++-
 lib/request/helpers/model-map.ts   |  20 +++-
 lib/request/request-transformer.ts |  64 ++++++++++--
 lib/request/response-compaction.ts | 158 +++++++++++++++++++++++++++++
 test/codex-manager-cli.test.ts     |  10 +-
 test/index.test.ts                 |  63 +++++++++++-
 test/model-map.test.ts             |   8 ++
 test/request-transformer.test.ts   |  28 ++++-
 test/response-compaction.test.ts   | 115 +++++++++++++++++++++
 10 files changed, 502 insertions(+), 25 deletions(-)
 create mode 100644 lib/request/response-compaction.ts
 create mode 100644 test/response-compaction.test.ts

diff --git a/index.ts b/index.ts
index 98d56bdd..ec31a07b 100644
--- a/index.ts
+++ b/index.ts
@@ -154,6 +154,7 @@ import {
 	isWorkspaceDisabledError,
 } from "./lib/request/fetch-helpers.js";
 import { applyFastSessionDefaults } from "./lib/request/request-transformer.js";
+import { applyResponseCompaction } from "./lib/request/response-compaction.js";
 import {
 	getRateLimitBackoff,
 	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
@@ -1369,10 +1370,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										fastSession: fastSessionEnabled,
 										fastSessionStrategy,
 										fastSessionMaxInputItems,
+										deferFastSessionInputTrimming: fastSessionEnabled,
 									},
 								);
 										let requestInit = transformation?.updatedInit ?? baseInit;
 										let transformedBody: RequestBody | undefined = transformation?.body;
+										const deferredFastSessionInputTrim =
+											transformation?.deferredFastSessionInputTrim;
 										const promptCacheKey = transformedBody?.prompt_cache_key;
 										let model = transformedBody?.model;
 										let modelFamily = model ? getModelFamily(model) : "gpt-5.1";
@@ -1670,6 +1674,36 @@ accountAttemptLoop: while (attempted.size < Math.max(1, accountCount)) {
 										promptCacheKey: effectivePromptCacheKey,
 									},
 								);
+								if (transformedBody && deferredFastSessionInputTrim) {
+									const compactionResult = await applyResponseCompaction({
+										body: transformedBody,
+										requestUrl: url,
+										headers,
+										trim: deferredFastSessionInputTrim,
+										fetchImpl: async (requestUrl, requestInit) => {
+											const normalizedCompactionUrl =
+												typeof requestUrl === "string"
+													? requestUrl
+													: String(requestUrl);
+											return fetch(
+												normalizedCompactionUrl,
+												applyProxyCompatibleInit(
+													normalizedCompactionUrl,
+													requestInit,
+												),
+											);
+										},
+										signal: abortSignal,
+										timeoutMs: Math.min(fetchTimeoutMs, 4_000),
+									});
+									if (compactionResult.mode !== "unchanged") {
+										transformedBody = compactionResult.body;
+										requestInit = {
+											...(requestInit ?? {}),
+											body: JSON.stringify(transformedBody),
+										};
+									}
+								}
 								const quotaScheduleKey = `${entitlementAccountKey}:${model ?? modelFamily}`;
 								const capabilityModelKey = model ?? modelFamily;
 								const quotaDeferral = preemptiveQuotaScheduler.getDeferral(quotaScheduleKey);
diff --git a/lib/request/fetch-helpers.ts b/lib/request/fetch-helpers.ts
index 37043418..348ea4f8 100644
--- a/lib/request/fetch-helpers.ts
+++ b/lib/request/fetch-helpers.ts
@@ -8,7 +8,12 @@ import { ProxyAgent } from "undici";
 import { queuedRefresh } from "../refresh-queue.js";
 import { logRequest, logError, logWarn } from "../logger.js";
 import { getCodexInstructions, getModelFamily } from "../prompts/codex.js";
-import { transformRequestBody, normalizeModel } from "./request-transformer.js";
+import {
+	transformRequestBody,
+	normalizeModel,
+	resolveFastSessionInputTrimPlan,
+	type FastSessionInputTrimPlan,
+} from "./request-transformer.js";
 import {
 	attachResponseIdCapture,
 	convertSseToJson,
@@ -99,6 +104,12 @@ export interface ResolveUnsupportedCodexFallbackOptions {
 	customChain?: Record<string, string[]>;
 }
 
+export interface TransformRequestForCodexResult {
+	body: RequestBody;
+	updatedInit: RequestInit;
+	deferredFastSessionInputTrim?: FastSessionInputTrimPlan["trim"];
+}
+
 function canonicalizeModelName(model: string | undefined): string | undefined {
 	if (!model) return undefined;
 	const trimmed = model.trim().toLowerCase();
@@ -651,8 +662,9 @@ export async function transformRequestForCodex(
 		fastSession?: boolean;
 		fastSessionStrategy?: "hybrid" | "always";
 		fastSessionMaxInputItems?: number;
+		deferFastSessionInputTrimming?: boolean;
 	},
-): Promise<{ body: RequestBody; updatedInit: RequestInit } | undefined> {
+): Promise<TransformRequestForCodexResult | undefined> {
 	const hasParsedBody =
 		parsedBody !== undefined &&
 		parsedBody !== null &&
@@ -670,6 +682,12 @@ export async function transformRequestForCodex(
 			body = JSON.parse(init.body) as RequestBody;
 		}
 		const originalModel = body.model;
+		const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
+			body,
+			options?.fastSession ?? false,
+			options?.fastSessionStrategy ?? "hybrid",
+			options?.fastSessionMaxInputItems ?? 30,
+		);
 
 		// Normalize model first to determine which instructions to fetch
 		// This ensures we get the correct model-specific prompt
@@ -700,6 +718,7 @@ export async function transformRequestForCodex(
 			options?.fastSession ?? false,
 			options?.fastSessionStrategy ?? "hybrid",
 			options?.fastSessionMaxInputItems ?? 30,
+			options?.deferFastSessionInputTrimming ?? false,
 		);
 
 		// Log transformed request
@@ -720,6 +739,10 @@ export async function transformRequestForCodex(
 			return {
 				body: transformedBody,
 				updatedInit: { ...(init ?? {}), body: JSON.stringify(transformedBody) },
+				deferredFastSessionInputTrim:
+					options?.deferFastSessionInputTrimming === true
+						? fastSessionInputTrimPlan.trim
+						: undefined,
 			};
 	} catch (e) {
 		logError(`${ERROR_MESSAGES.REQUEST_PARSE_ERROR}`, e);
diff --git a/lib/request/helpers/model-map.ts b/lib/request/helpers/model-map.ts
index 20a6832d..b623c845 100644
--- a/lib/request/helpers/model-map.ts
+++ b/lib/request/helpers/model-map.ts
@@ -25,6 +25,7 @@ export type PromptModelFamily =
 export interface ModelCapabilities {
 	toolSearch: boolean;
 	computerUse: boolean;
+	compaction: boolean;
 }
 
 export interface ModelProfile {
@@ -48,14 +49,27 @@ const TOOL_CAPABILITIES = {
 	full: {
 		toolSearch: true,
 		computerUse: true,
+		compaction: true,
 	},
 	computerOnly: {
 		toolSearch: false,
 		computerUse: true,
+		compaction: false,
+	},
+	computerAndCompact: {
+		toolSearch: false,
+		computerUse: true,
+		compaction: true,
+	},
+	compactOnly: {
+		toolSearch: false,
+		computerUse: false,
+		compaction: true,
 	},
 	basic: {
 		toolSearch: false,
 		computerUse: false,
+		compaction: false,
 	},
 } as const satisfies Record<string, ModelCapabilities>;
 
@@ -103,7 +117,7 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "high",
 		supportedReasoningEfforts: ["medium", "high", "xhigh"],
-		capabilities: TOOL_CAPABILITIES.computerOnly,
+		capabilities: TOOL_CAPABILITIES.computerAndCompact,
 	},
 	"gpt-5.2-pro": {
 		normalizedModel: "gpt-5.2-pro",
@@ -145,14 +159,14 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "medium",
 		supportedReasoningEfforts: ["medium"],
-		capabilities: TOOL_CAPABILITIES.basic,
+		capabilities: TOOL_CAPABILITIES.full,
 	},
 	"gpt-5-nano": {
 		normalizedModel: "gpt-5-nano",
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "medium",
 		supportedReasoningEfforts: ["medium"],
-		capabilities: TOOL_CAPABILITIES.basic,
+		capabilities: TOOL_CAPABILITIES.compactOnly,
 	},
 } as const;
 
diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts
index 6c002476..3f6a3353 100644
--- a/lib/request/request-transformer.ts
+++ b/lib/request/request-transformer.ts
@@ -33,6 +33,7 @@ export interface TransformRequestBodyParams {
 	fastSession?: boolean;
 	fastSessionStrategy?: FastSessionStrategy;
 	fastSessionMaxInputItems?: number;
+	deferFastSessionInputTrimming?: boolean;
 }
 
 const PLAN_MODE_ONLY_TOOLS = new Set(["request_user_input"]);
@@ -482,6 +483,15 @@ export function trimInputForFastSession(
 	return trimmed.slice(trimmed.length - safeMax);
 }
 
+export interface FastSessionInputTrimPlan {
+	shouldApply: boolean;
+	isTrivialTurn: boolean;
+	trim?: {
+		maxItems: number;
+		preferLatestUserOnly: boolean;
+	};
+}
+
 function isTrivialLatestPrompt(text: string): boolean {
 	const normalized = text.trim();
 	if (!normalized) return false;
@@ -540,6 +550,33 @@ function isComplexFastSessionRequest(
 	return false;
 }
 
+export function resolveFastSessionInputTrimPlan(
+	body: RequestBody,
+	fastSession: boolean,
+	fastSessionStrategy: FastSessionStrategy,
+	fastSessionMaxInputItems: number,
+): FastSessionInputTrimPlan {
+	const shouldApplyFastSessionTuning =
+		fastSession &&
+		(fastSessionStrategy === "always" ||
+			!isComplexFastSessionRequest(body, fastSessionMaxInputItems));
+	const latestUserText = getLatestUserText(body.input);
+	const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
+	const shouldPreferLatestUserOnly =
+		shouldApplyFastSessionTuning && isTrivialTurn;
+
+	return {
+		shouldApply: shouldApplyFastSessionTuning,
+		isTrivialTurn,
+		trim: shouldApplyFastSessionTuning
+			? {
+					maxItems: fastSessionMaxInputItems,
+					preferLatestUserOnly: shouldPreferLatestUserOnly,
+				}
+			: undefined,
+	};
+}
+
 function getLatestUserText(input: InputItem[] | undefined): string | undefined {
 	if (!Array.isArray(input)) return undefined;
 	for (let i = input.length - 1; i >= 0; i--) {
@@ -672,6 +709,7 @@ export async function transformRequestBody(
 	fastSession?: boolean,
 	fastSessionStrategy?: FastSessionStrategy,
 	fastSessionMaxInputItems?: number,
+	deferFastSessionInputTrimming?: boolean,
 ): Promise<RequestBody>;
 export async function transformRequestBody(
 	bodyOrParams: RequestBody | TransformRequestBodyParams,
@@ -681,6 +719,7 @@ export async function transformRequestBody(
 	fastSession = false,
 	fastSessionStrategy: FastSessionStrategy = "hybrid",
 	fastSessionMaxInputItems = 30,
+	deferFastSessionInputTrimming = false,
 ): Promise<RequestBody> {
 	const useNamedParams =
 		typeof codexInstructions === "undefined" &&
@@ -695,6 +734,7 @@ export async function transformRequestBody(
 	let resolvedFastSession: boolean;
 	let resolvedFastSessionStrategy: FastSessionStrategy;
 	let resolvedFastSessionMaxInputItems: number;
+	let resolvedDeferFastSessionInputTrimming: boolean;
 
 	if (useNamedParams) {
 		const namedParams = bodyOrParams as TransformRequestBodyParams;
@@ -705,6 +745,8 @@ export async function transformRequestBody(
 		resolvedFastSession = namedParams.fastSession ?? false;
 		resolvedFastSessionStrategy = namedParams.fastSessionStrategy ?? "hybrid";
 		resolvedFastSessionMaxInputItems = namedParams.fastSessionMaxInputItems ?? 30;
+		resolvedDeferFastSessionInputTrimming =
+			namedParams.deferFastSessionInputTrimming ?? false;
 	} else {
 		body = bodyOrParams as RequestBody;
 		resolvedCodexInstructions = codexInstructions;
@@ -713,6 +755,7 @@ export async function transformRequestBody(
 		resolvedFastSession = fastSession;
 		resolvedFastSessionStrategy = fastSessionStrategy;
 		resolvedFastSessionMaxInputItems = fastSessionMaxInputItems;
+		resolvedDeferFastSessionInputTrimming = deferFastSessionInputTrimming;
 	}
 
 	if (!body || typeof body !== "object") {
@@ -747,17 +790,17 @@ export async function transformRequestBody(
 	const reasoningModel = shouldUseNormalizedReasoningModel
 		? normalizedModel
 		: lookupModel;
-	const shouldApplyFastSessionTuning =
-		resolvedFastSession &&
-		(resolvedFastSessionStrategy === "always" ||
-			!isComplexFastSessionRequest(body, resolvedFastSessionMaxInputItems));
-	const latestUserText = getLatestUserText(body.input);
-	const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
+	const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
+		body,
+		resolvedFastSession,
+		resolvedFastSessionStrategy,
+		resolvedFastSessionMaxInputItems,
+	);
+	const shouldApplyFastSessionTuning = fastSessionInputTrimPlan.shouldApply;
+	const isTrivialTurn = fastSessionInputTrimPlan.isTrivialTurn;
 	const shouldDisableToolsForTrivialTurn =
 		shouldApplyFastSessionTuning &&
 		isTrivialTurn;
-	const shouldPreferLatestUserOnly =
-		shouldApplyFastSessionTuning && isTrivialTurn;
 
 	// Codex required fields
 	// ChatGPT backend REQUIRES store=false (confirmed via testing)
@@ -789,10 +832,11 @@ export async function transformRequestBody(
 	if (body.input && Array.isArray(body.input)) {
 		let inputItems: InputItem[] = body.input;
 
-			if (shouldApplyFastSessionTuning) {
+			if (shouldApplyFastSessionTuning && !resolvedDeferFastSessionInputTrimming) {
 				inputItems =
 						trimInputForFastSession(inputItems, resolvedFastSessionMaxInputItems, {
-							preferLatestUserOnly: shouldPreferLatestUserOnly,
+							preferLatestUserOnly:
+								fastSessionInputTrimPlan.trim?.preferLatestUserOnly ?? false,
 						}) ?? inputItems;
 			}
 
diff --git a/lib/request/response-compaction.ts b/lib/request/response-compaction.ts
new file mode 100644
index 00000000..d61151fe
--- /dev/null
+++ b/lib/request/response-compaction.ts
@@ -0,0 +1,158 @@
+import { logDebug, logWarn } from "../logger.js";
+import type { InputItem, RequestBody } from "../types.js";
+import { isRecord } from "../utils.js";
+import { getModelCapabilities } from "./helpers/model-map.js";
+import { trimInputForFastSession } from "./request-transformer.js";
+
+export interface DeferredFastSessionInputTrim {
+	maxItems: number;
+	preferLatestUserOnly: boolean;
+}
+
+export interface ResponseCompactionResult {
+	body: RequestBody;
+	mode: "compacted" | "trimmed" | "unchanged";
+}
+
+export interface ApplyResponseCompactionParams {
+	body: RequestBody;
+	requestUrl: string;
+	headers: Headers;
+	trim: DeferredFastSessionInputTrim;
+	fetchImpl: typeof fetch;
+	signal?: AbortSignal | null;
+	timeoutMs?: number;
+}
+
+function isInputItemArray(value: unknown): value is InputItem[] {
+	return Array.isArray(value) && value.every((item) => isRecord(item));
+}
+
+function extractCompactedInput(payload: unknown): InputItem[] | undefined {
+	if (!isRecord(payload)) return undefined;
+	if (isInputItemArray(payload.output)) return payload.output;
+	if (isInputItemArray(payload.input)) return payload.input;
+
+	const response = payload.response;
+	if (!isRecord(response)) return undefined;
+	if (isInputItemArray(response.output)) return response.output;
+	if (isInputItemArray(response.input)) return response.input;
+	return undefined;
+}
+
+function buildCompactionUrl(requestUrl: string): string {
+	return requestUrl.endsWith("/compact") ? requestUrl : `${requestUrl}/compact`;
+}
+
+function createFallbackBody(
+	body: RequestBody,
+	trim: DeferredFastSessionInputTrim,
+): RequestBody | undefined {
+	if (!Array.isArray(body.input)) return undefined;
+	const trimmedInput =
+		trimInputForFastSession(body.input, trim.maxItems, {
+			preferLatestUserOnly: trim.preferLatestUserOnly,
+		}) ?? body.input;
+
+	return trimmedInput === body.input ? undefined : { ...body, input: trimmedInput };
+}
+
+function createTimedAbortSignal(
+	signal: AbortSignal | null | undefined,
+	timeoutMs: number,
+): { signal: AbortSignal; cleanup: () => void } {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => {
+		controller.abort(new Error("Response compaction timeout"));
+	}, timeoutMs);
+
+	const onAbort = () => {
+		controller.abort(signal?.reason ?? new Error("Aborted"));
+	};
+
+	if (signal?.aborted) {
+		onAbort();
+	} else if (signal) {
+		signal.addEventListener("abort", onAbort, { once: true });
+	}
+
+	return {
+		signal: controller.signal,
+		cleanup: () => {
+			clearTimeout(timeout);
+			signal?.removeEventListener("abort", onAbort);
+		},
+	};
+}
+
+export async function applyResponseCompaction(
+	params: ApplyResponseCompactionParams,
+): Promise<ResponseCompactionResult> {
+	const fallbackBody = createFallbackBody(params.body, params.trim);
+	if (!fallbackBody) {
+		return { body: params.body, mode: "unchanged" };
+	}
+
+	if (!getModelCapabilities(params.body.model).compaction) {
+		return { body: fallbackBody, mode: "trimmed" };
+	}
+
+	const compactionHeaders = new Headers(params.headers);
+	compactionHeaders.set("accept", "application/json");
+	compactionHeaders.set("content-type", "application/json");
+	const { signal, cleanup } = createTimedAbortSignal(
+		params.signal,
+		Math.max(250, params.timeoutMs ?? 4_000),
+	);
+
+	try {
+		const response = await params.fetchImpl(buildCompactionUrl(params.requestUrl), {
+			method: "POST",
+			headers: compactionHeaders,
+			body: JSON.stringify({
+				model: params.body.model,
+				input: params.body.input,
+			}),
+			signal,
+		});
+
+		if (!response.ok) {
+			logWarn("Responses compaction request failed; using trim fallback.", {
+				status: response.status,
+				statusText: response.statusText,
+				model: params.body.model,
+			});
+			return { body: fallbackBody, mode: "trimmed" };
+		}
+
+		const payload = (await response.json()) as unknown;
+		const compactedInput = extractCompactedInput(payload);
+		if (!compactedInput || compactedInput.length === 0) {
+			logWarn("Responses compaction returned no reusable input; using trim fallback.", {
+				model: params.body.model,
+			});
+			return { body: fallbackBody, mode: "trimmed" };
+		}
+
+		logDebug("Applied server-side response compaction.", {
+			model: params.body.model,
+			originalInputLength: Array.isArray(params.body.input) ? params.body.input.length : 0,
+			compactedInputLength: compactedInput.length,
+		});
+		return { body: { ...params.body, input: compactedInput }, mode: "compacted" };
+	} catch (error) {
+		if (signal.aborted && params.signal?.aborted) {
+			throw params.signal.reason instanceof Error
+				? params.signal.reason
+				: new Error("Aborted");
+		}
+
+		logWarn("Responses compaction failed; using trim fallback.", {
+			model: params.body.model,
+			error: error instanceof Error ? error.message : String(error),
+		});
+		return { body: fallbackBody, mode: "trimmed" };
+	} finally {
+		cleanup();
+	}
+}
diff --git a/test/codex-manager-cli.test.ts b/test/codex-manager-cli.test.ts
index 613d6c93..ef1a6ead 100644
--- a/test/codex-manager-cli.test.ts
+++ b/test/codex-manager-cli.test.ts
@@ -5707,7 +5707,7 @@ describe("codex manager cli commands", () => {
 				normalized: string;
 				remapped: boolean;
 				promptFamily: string;
-				capabilities: { toolSearch: boolean; computerUse: boolean };
+				capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean };
 			};
 		};
 		expect(payload.command).toBe("report");
@@ -5722,6 +5722,7 @@ describe("codex manager cli commands", () => {
 			capabilities: {
 				toolSearch: false,
 				computerUse: false,
+				compaction: false,
 			},
 		});
 	});
@@ -5760,7 +5761,7 @@ describe("codex manager cli commands", () => {
 				normalized: string;
 				remapped: boolean;
 				promptFamily: string;
-				capabilities: { toolSearch: boolean; computerUse: boolean };
+				capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean };
 			};
 		};
 		expect(payload.modelSelection).toEqual({
@@ -5769,8 +5770,9 @@ describe("codex manager cli commands", () => {
 			remapped: true,
 			promptFamily: "gpt-5.2",
 			capabilities: {
-				toolSearch: false,
-				computerUse: false,
+				toolSearch: true,
+				computerUse: true,
+				compaction: true,
 			},
 		});
 	});
diff --git a/test/index.test.ts b/test/index.test.ts
index fb89f4e6..7882bc6f 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -136,9 +136,13 @@ vi.mock("../lib/live-account-sync.js", () => ({
 	LiveAccountSync: liveAccountSyncCtorMock,
 }));
 
-vi.mock("../lib/request/request-transformer.js", () => ({
-	applyFastSessionDefaults: <T>(config: T) => config,
-}));
+vi.mock("../lib/request/request-transformer.js", async () => {
+	const actual = await vi.importActual("../lib/request/request-transformer.js");
+	return {
+		...(actual as Record<string, unknown>),
+		applyFastSessionDefaults: <T>(config: T) => config,
+	};
+});
 
 vi.mock("../lib/logger.js", () => ({
 	initLogger: vi.fn(),
@@ -1431,6 +1435,59 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		expect(secondBody?.previous_response_id).toBe("resp_explicit_456");
 	});
 
+	it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => {
+		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
+		const longInput = Array.from({ length: 12 }, (_value, index) => ({
+			type: "message",
+			role: index === 0 ? "developer" : "user",
+			content: index === 0 ? "system prompt" : `message-${index}`,
+		}));
+		const compactedInput = [
+			{
+				type: "message",
+				role: "assistant",
+				content: "compacted summary",
+			},
+		];
+
+		vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({
+			updatedInit: {
+				method: "POST",
+				body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+			},
+			body: { model: "gpt-5-mini", input: longInput },
+			deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false },
+		});
+
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ output: compactedInput }), { status: 200 }),
+			)
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ content: "ok" }), { status: 200 }),
+			);
+
+		const { sdk } = await setupPlugin();
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+		});
+
+		expect(response.status).toBe(200);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(2);
+		expect(vi.mocked(globalThis.fetch).mock.calls[0]?.[0]).toBe(
+			"https://api.openai.com/v1/chat/compact",
+		);
+
+		const upstreamInit = vi.mocked(globalThis.fetch).mock.calls[1]?.[1] as RequestInit;
+		const upstreamBody =
+			typeof upstreamInit.body === "string"
+				? (JSON.parse(upstreamInit.body) as { input?: unknown[] })
+				: {};
+		expect(upstreamBody.input).toEqual(compactedInput);
+	});
+
 	it("uses the refreshed token email when checking entitlement blocks", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const manager = buildRoutingManager([
diff --git a/test/model-map.test.ts b/test/model-map.test.ts
index 6ad16967..7d2f8adb 100644
--- a/test/model-map.test.ts
+++ b/test/model-map.test.ts
@@ -84,14 +84,22 @@ describe("model map", () => {
 			expect(getModelCapabilities("gpt-5.4")).toEqual({
 				toolSearch: true,
 				computerUse: true,
+				compaction: true,
 			});
 			expect(getModelCapabilities("gpt-5.4-pro")).toEqual({
 				toolSearch: false,
 				computerUse: true,
+				compaction: true,
 			});
 			expect(getModelCapabilities("gpt-5-mini")).toEqual({
+				toolSearch: true,
+				computerUse: true,
+				compaction: true,
+			});
+			expect(getModelCapabilities("gpt-5-nano")).toEqual({
 				toolSearch: false,
 				computerUse: false,
+				compaction: true,
 			});
 		});
 	});
diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts
index 51eb1214..17efbbcf 100644
--- a/test/request-transformer.test.ts
+++ b/test/request-transformer.test.ts
@@ -653,9 +653,31 @@ describe('Request Transformer Module', () => {
 						},
 					},
 				};
-				const result = await transformRequestBody(body, codexInstructions);
-				expect(result.text?.verbosity).toBe('medium');
-				expect(result.text?.format).toEqual(body.text?.format);
+			const result = await transformRequestBody(body, codexInstructions);
+			expect(result.text?.verbosity).toBe('medium');
+			expect(result.text?.format).toEqual(body.text?.format);
+		});
+
+			it('defers fast-session input trimming when requested for downstream compaction', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: Array.from({ length: 12 }, (_value, index) => ({
+						type: 'message',
+						role: index === 0 ? 'developer' : 'user',
+						content: index === 0 ? 'system prompt' : `message-${index}`,
+					})),
+				};
+				const result = await transformRequestBody(
+					body,
+					codexInstructions,
+					{ global: {}, models: {} },
+					true,
+					true,
+					'always',
+					8,
+					true,
+				);
+				expect(result.input).toHaveLength(12);
 			});
 
 		it('should set required Codex fields', async () => {
diff --git a/test/response-compaction.test.ts b/test/response-compaction.test.ts
new file mode 100644
index 00000000..649532ee
--- /dev/null
+++ b/test/response-compaction.test.ts
@@ -0,0 +1,115 @@
+import { applyResponseCompaction } from "../lib/request/response-compaction.js";
+import type { RequestBody } from "../lib/types.js";
+
+function buildInput(length: number) {
+	return Array.from({ length }, (_value, index) => ({
+		type: "message",
+		role: index === 0 ? "developer" : "user",
+		content: index === 0 ? "system prompt" : `message-${index}`,
+	}));
+}
+
+describe("response compaction", () => {
+	it("returns unchanged when the fast-session trim would be a no-op", async () => {
+		const body: RequestBody = {
+			model: "gpt-5.4",
+			input: buildInput(2),
+		};
+		const fetchImpl = vi.fn<typeof fetch>();
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("unchanged");
+		expect(fetchImpl).not.toHaveBeenCalled();
+		expect(result.body.input).toEqual(body.input);
+	});
+
+	it("falls back to local trimming when the model does not support compaction", async () => {
+		const body: RequestBody = {
+			model: "gpt-5-codex",
+			input: buildInput(10),
+		};
+		const fetchImpl = vi.fn<typeof fetch>();
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("trimmed");
+		expect(fetchImpl).not.toHaveBeenCalled();
+		expect(result.body.input).toHaveLength(8);
+	});
+
+	it("replaces request input with server-compacted output when available", async () => {
+		const compactedOutput = [
+			{
+				type: "message",
+				role: "assistant",
+				content: "compacted summary",
+			},
+		];
+		const body: RequestBody = {
+			model: "gpt-5-mini",
+			input: buildInput(12),
+		};
+		const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+			new Response(JSON.stringify({ output: compactedOutput }), { status: 200 }),
+		);
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers({ accept: "text/event-stream" }),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("compacted");
+		expect(result.body.input).toEqual(compactedOutput);
+		expect(fetchImpl).toHaveBeenCalledTimes(1);
+		expect(fetchImpl).toHaveBeenCalledWith(
+			"https://chatgpt.com/backend-api/codex/responses/compact",
+			expect.objectContaining({
+				method: "POST",
+				headers: expect.any(Headers),
+			}),
+		);
+
+		const requestInit = vi.mocked(fetchImpl).mock.calls[0]?.[1];
+		const headers = new Headers(requestInit?.headers);
+		expect(headers.get("accept")).toBe("application/json");
+		expect(headers.get("content-type")).toBe("application/json");
+	});
+
+	it("falls back to local trimming when the compaction request fails", async () => {
+		const body: RequestBody = {
+			model: "gpt-5.4",
+			input: buildInput(12),
+		};
+		const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+			new Response(JSON.stringify({ error: { message: "nope" } }), { status: 404 }),
+		);
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("trimmed");
+		expect(result.body.input).toHaveLength(8);
+		expect(fetchImpl).toHaveBeenCalledTimes(1);
+	});
+});

From 21df8136a0a76323541e4d96fe1fbc9a972bce58 Mon Sep 17 00:00:00 2001
From: ndycode <noreply@users.noreply.github.com>
Date: Sun, 22 Mar 2026 16:40:41 +0800
Subject: [PATCH 3/5] type responses text format and prompt cache retention

---
 lib/request/request-transformer.ts | 18 +++++++++++++
 lib/types.ts                       | 10 ++++++-
 test/public-api-contract.test.ts   | 15 +++++++++++
 test/request-transformer.test.ts   | 42 ++++++++++++++++++++++++++++++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts
index 3f6a3353..5a407f55 100644
--- a/lib/request/request-transformer.ts
+++ b/lib/request/request-transformer.ts
@@ -198,6 +198,18 @@ function resolveTextVerbosity(
 	);
 }
 
+function resolvePromptCacheRetention(
+	modelConfig: ConfigOptions,
+	body: RequestBody,
+): RequestBody["prompt_cache_retention"] {
+	const providerOpenAI = body.providerOptions?.openai;
+	return (
+		body.prompt_cache_retention ??
+		providerOpenAI?.promptCacheRetention ??
+		modelConfig.promptCacheRetention
+	);
+}
+
 function resolveInclude(modelConfig: ConfigOptions, body: RequestBody): string[] {
 	const providerOpenAI = body.providerOptions?.openai;
 	const base =
@@ -899,11 +911,17 @@ export async function transformRequestBody(
 
 	// Configure text verbosity (support user config)
 	// Default: "medium" (matches Codex CLI default for all GPT-5 models)
+	// Preserve any structured-output `text.format` contract from the host.
 	body.text = {
 		...body.text,
 		verbosity: resolveTextVerbosity(modelConfig, body),
 	};
 
+	const promptCacheRetention = resolvePromptCacheRetention(modelConfig, body);
+	if (promptCacheRetention !== undefined) {
+		body.prompt_cache_retention = promptCacheRetention;
+	}
+
 	if (shouldApplyFastSessionTuning) {
 		// In fast-session mode, prioritize speed by clamping to minimum reasoning + verbosity.
 		// getReasoningConfig normalizes unsupported values per model family.
diff --git a/lib/types.ts b/lib/types.ts
index 1feeb8a9..17323401 100644
--- a/lib/types.ts
+++ b/lib/types.ts
@@ -24,9 +24,17 @@ export interface ConfigOptions {
 	reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "concise" | "detailed" | "off" | "on";
 	textVerbosity?: "low" | "medium" | "high";
+	promptCacheRetention?: PromptCacheRetention;
 	include?: string[];
 }
 
+export type PromptCacheRetention =
+	| "5m"
+	| "1h"
+	| "24h"
+	| "7d"
+	| (string & {});
+
 export interface ReasoningConfig {
 	effort: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	summary: "auto" | "concise" | "detailed";
@@ -131,7 +139,7 @@ export interface RequestBody {
 	/** Stable key to enable prompt-token caching on Codex backend */
 	prompt_cache_key?: string;
 	/** Retention mode for server-side prompt cache entries */
-	prompt_cache_retention?: string;
+	prompt_cache_retention?: PromptCacheRetention;
 	/** Resume a prior Responses API turn without resending the full transcript */
 	previous_response_id?: string;
 	max_output_tokens?: number;
diff --git a/test/public-api-contract.test.ts b/test/public-api-contract.test.ts
index 307093f3..89aa891a 100644
--- a/test/public-api-contract.test.ts
+++ b/test/public-api-contract.test.ts
@@ -116,6 +116,21 @@ describe("public api contract", () => {
 		const baseBody: RequestBody = {
 			model: "gpt-5-codex",
 			input: [{ type: "message", role: "user", content: "hi" }],
+			prompt_cache_retention: "24h",
+			text: {
+				format: {
+					type: "json_schema",
+					name: "compat_response",
+					schema: {
+						type: "object",
+						properties: {
+							answer: { type: "string" },
+						},
+						required: ["answer"],
+					},
+					strict: true,
+				},
+			},
 		};
 		const transformedPositional = await transformRequestBody(
 			JSON.parse(JSON.stringify(baseBody)) as RequestBody,
diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts
index 17efbbcf..a43b3c1e 100644
--- a/test/request-transformer.test.ts
+++ b/test/request-transformer.test.ts
@@ -634,6 +634,35 @@ describe('Request Transformer Module', () => {
 				expect(result.prompt_cache_retention).toBe('24h');
 			});
 
+			it('uses prompt_cache_retention from providerOptions when body omits it', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const result = await transformRequestBody(body, codexInstructions);
+				expect(result.prompt_cache_retention).toBe('1h');
+			});
+
+			it('prefers body prompt_cache_retention over providerOptions', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					prompt_cache_retention: '24h',
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const result = await transformRequestBody(body, codexInstructions);
+				expect(result.prompt_cache_retention).toBe('24h');
+			});
+
 			it('preserves text.format when applying text verbosity defaults', async () => {
 				const body: RequestBody = {
 					model: 'gpt-5.4',
@@ -1254,6 +1283,19 @@ describe('Request Transformer Module', () => {
 			expect(result.text?.verbosity).toBe('low');
 		});
 
+		it('should inherit prompt_cache_retention from user config', async () => {
+			const body: RequestBody = {
+				model: 'gpt-5.4',
+				input: [],
+			};
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {},
+			};
+			const result = await transformRequestBody(body, codexInstructions, userConfig);
+			expect(result.prompt_cache_retention).toBe('7d');
+		});
+
 		it('should prefer body text verbosity over providerOptions', async () => {
 			const body: RequestBody = {
 				model: 'gpt-5',

From 456cbaf3ea7d944580cfe5533dffe2df31adb098 Mon Sep 17 00:00:00 2001
From: ndycode <405533+ndycode@users.noreply.github.com>
Date: Sun, 22 Mar 2026 16:57:48 +0800
Subject: [PATCH 4/5] Add provider prompt cache precedence coverage

---
 test/request-transformer.test.ts | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts
index a43b3c1e..98dc4d2f 100644
--- a/test/request-transformer.test.ts
+++ b/test/request-transformer.test.ts
@@ -648,6 +648,24 @@ describe('Request Transformer Module', () => {
 				expect(result.prompt_cache_retention).toBe('1h');
 			});
 
+			it('prefers providerOptions prompt_cache_retention over user config defaults', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: [],
+					providerOptions: {
+						openai: {
+							promptCacheRetention: '1h',
+						},
+					},
+				};
+				const userConfig: UserConfig = {
+					global: { promptCacheRetention: '7d' },
+					models: {},
+				};
+				const result = await transformRequestBody(body, codexInstructions, userConfig);
+				expect(result.prompt_cache_retention).toBe('1h');
+			});
+
 			it('prefers body prompt_cache_retention over providerOptions', async () => {
 				const body: RequestBody = {
 					model: 'gpt-5.4',

From a0e3841a540285a27cfa01b9e417460e48759a94 Mon Sep 17 00:00:00 2001
From: ndycode <405533+ndycode@users.noreply.github.com>
Date: Sun, 22 Mar 2026 18:48:25 +0800
Subject: [PATCH 5/5] add prompt cache retention regressions

---
 test/public-api-contract.test.ts |  4 ++++
 test/request-transformer.test.ts | 37 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/test/public-api-contract.test.ts b/test/public-api-contract.test.ts
index 89aa891a..a9d9a484 100644
--- a/test/public-api-contract.test.ts
+++ b/test/public-api-contract.test.ts
@@ -141,5 +141,9 @@ describe("public api contract", () => {
 			codexInstructions: "codex",
 		});
 		expect(transformedNamed).toEqual(transformedPositional);
+		expect(transformedPositional.prompt_cache_retention).toBe(baseBody.prompt_cache_retention);
+		expect(transformedNamed.prompt_cache_retention).toBe(baseBody.prompt_cache_retention);
+		expect(transformedPositional.text?.format).toEqual(baseBody.text?.format);
+		expect(transformedNamed.text?.format).toEqual(baseBody.text?.format);
 	});
 });
diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts
index 98dc4d2f..af8d5c84 100644
--- a/test/request-transformer.test.ts
+++ b/test/request-transformer.test.ts
@@ -1314,6 +1314,43 @@ describe('Request Transformer Module', () => {
 			expect(result.prompt_cache_retention).toBe('7d');
 		});
 
+		it('should inherit prompt_cache_retention from model-specific user config', async () => {
+			const body: RequestBody = {
+				model: 'gpt-5.4',
+				input: [],
+			};
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {
+					'gpt-5.4': {
+						options: { promptCacheRetention: '24h' },
+					},
+				},
+			};
+			const result = await transformRequestBody(body, codexInstructions, userConfig);
+			expect(result.prompt_cache_retention).toBe('24h');
+		});
+
+		it('should inherit model-specific prompt_cache_retention in named params overload', async () => {
+			const userConfig: UserConfig = {
+				global: { promptCacheRetention: '7d' },
+				models: {
+					'gpt-5.4': {
+						options: { promptCacheRetention: '24h' },
+					},
+				},
+			};
+			const result = await transformRequestBody({
+				body: {
+					model: 'gpt-5.4',
+					input: [],
+				},
+				codexInstructions,
+				userConfig,
+			});
+			expect(result.prompt_cache_retention).toBe('24h');
+		});
+
 		it('should prefer body text verbosity over providerOptions', async () => {
 			const body: RequestBody = {
 				model: 'gpt-5',