diff --git a/index.ts b/index.ts
index 13c03abb..42f02245 100644
--- a/index.ts
+++ b/index.ts
@@ -154,6 +154,7 @@ import {
 	isWorkspaceDisabledError,
 } from "./lib/request/fetch-helpers.js";
 import { applyFastSessionDefaults } from "./lib/request/request-transformer.js";
+import { applyResponseCompaction } from "./lib/request/response-compaction.js";
 import {
 	getRateLimitBackoff,
 	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
@@ -1351,10 +1352,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										fastSession: fastSessionEnabled,
 										fastSessionStrategy,
 										fastSessionMaxInputItems,
+										deferFastSessionInputTrimming: fastSessionEnabled,
 									},
 								);
 										let requestInit = transformation?.updatedInit ?? baseInit;
 										let transformedBody: RequestBody | undefined = transformation?.body;
+										let pendingFastSessionInputTrim =
+											transformation?.deferredFastSessionInputTrim;
 										const promptCacheKey = transformedBody?.prompt_cache_key;
 										let model = transformedBody?.model;
 										let modelFamily = model ? getModelFamily(model) : "gpt-5.1";
@@ -1672,6 +1676,38 @@ accountAttemptLoop: while (attempted.size < Math.max(1, accountCount)) {
 										promptCacheKey: effectivePromptCacheKey,
 									},
 								);
+								if (transformedBody && pendingFastSessionInputTrim) {
+									const activeFastSessionInputTrim = pendingFastSessionInputTrim;
+									pendingFastSessionInputTrim = undefined;
+									const compactionResult = await applyResponseCompaction({
+										body: transformedBody,
+										requestUrl: url,
+										headers,
+										trim: activeFastSessionInputTrim,
+										fetchImpl: async (requestUrl, requestInit) => {
+											const normalizedCompactionUrl =
+												typeof requestUrl === "string"
+													? requestUrl
+													: String(requestUrl);
+											return fetch(
+												normalizedCompactionUrl,
+												applyProxyCompatibleInit(
+													normalizedCompactionUrl,
+													requestInit,
+												),
+											);
+										},
+										signal: abortSignal,
+										timeoutMs: Math.min(fetchTimeoutMs, 4_000),
+									});
+									if (compactionResult.mode !== "unchanged") {
+										transformedBody = compactionResult.body;
+										requestInit = {
+											...(requestInit ?? {}),
+											body: JSON.stringify(transformedBody),
+										};
+									}
+								}
 								const quotaScheduleKey = `${entitlementAccountKey}:${model ?? modelFamily}`;
 								const capabilityModelKey = model ?? modelFamily;
 								const quotaDeferral = preemptiveQuotaScheduler.getDeferral(quotaScheduleKey);
diff --git a/lib/request/fetch-helpers.ts b/lib/request/fetch-helpers.ts
index 37043418..348ea4f8 100644
--- a/lib/request/fetch-helpers.ts
+++ b/lib/request/fetch-helpers.ts
@@ -8,7 +8,12 @@ import { ProxyAgent } from "undici";
 import { queuedRefresh } from "../refresh-queue.js";
 import { logRequest, logError, logWarn } from "../logger.js";
 import { getCodexInstructions, getModelFamily } from "../prompts/codex.js";
-import { transformRequestBody, normalizeModel } from "./request-transformer.js";
+import {
+	transformRequestBody,
+	normalizeModel,
+	resolveFastSessionInputTrimPlan,
+	type FastSessionInputTrimPlan,
+} from "./request-transformer.js";
 import {
 	attachResponseIdCapture,
 	convertSseToJson,
@@ -99,6 +104,12 @@ export interface ResolveUnsupportedCodexFallbackOptions {
 	customChain?: Record<string, string[]>;
 }
 
+export interface TransformRequestForCodexResult {
+	body: RequestBody;
+	updatedInit: RequestInit;
+	deferredFastSessionInputTrim?: FastSessionInputTrimPlan["trim"];
+}
+
 function canonicalizeModelName(model: string | undefined): string | undefined {
 	if (!model) return undefined;
 	const trimmed = model.trim().toLowerCase();
@@ -651,8 +662,9 @@ export async function transformRequestForCodex(
 		fastSession?: boolean;
 		fastSessionStrategy?: "hybrid" | "always";
 		fastSessionMaxInputItems?: number;
+		deferFastSessionInputTrimming?: boolean;
 	},
-): Promise<{ body: RequestBody; updatedInit: RequestInit } | undefined> {
+): Promise<TransformRequestForCodexResult | undefined> {
 	const hasParsedBody =
 		parsedBody !== undefined &&
 		parsedBody !== null &&
@@ -670,6 +682,12 @@ export async function transformRequestForCodex(
 			body = JSON.parse(init.body) as RequestBody;
 		}
 		const originalModel = body.model;
+		const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
+			body,
+			options?.fastSession ?? false,
+			options?.fastSessionStrategy ?? "hybrid",
+			options?.fastSessionMaxInputItems ?? 30,
+		);
 
 		// Normalize model first to determine which instructions to fetch
 		// This ensures we get the correct model-specific prompt
@@ -700,6 +718,7 @@ export async function transformRequestForCodex(
 			options?.fastSession ?? false,
 			options?.fastSessionStrategy ?? "hybrid",
 			options?.fastSessionMaxInputItems ?? 30,
+			options?.deferFastSessionInputTrimming ?? false,
 		);
 
 		// Log transformed request
@@ -720,6 +739,10 @@ export async function transformRequestForCodex(
 			return {
 				body: transformedBody,
 				updatedInit: { ...(init ?? {}), body: JSON.stringify(transformedBody) },
+				deferredFastSessionInputTrim:
+					options?.deferFastSessionInputTrimming === true
+						? fastSessionInputTrimPlan.trim
+						: undefined,
 			};
 	} catch (e) {
 		logError(`${ERROR_MESSAGES.REQUEST_PARSE_ERROR}`, e);
diff --git a/lib/request/helpers/model-map.ts b/lib/request/helpers/model-map.ts
index 20a6832d..4f303ae0 100644
--- a/lib/request/helpers/model-map.ts
+++ b/lib/request/helpers/model-map.ts
@@ -25,6 +25,7 @@ export type PromptModelFamily =
 export interface ModelCapabilities {
 	toolSearch: boolean;
 	computerUse: boolean;
+	compaction: boolean;
 }
 
 export interface ModelProfile {
@@ -48,14 +49,27 @@ const TOOL_CAPABILITIES = {
 	full: {
 		toolSearch: true,
 		computerUse: true,
+		compaction: true,
 	},
 	computerOnly: {
 		toolSearch: false,
 		computerUse: true,
+		compaction: false,
+	},
+	computerAndCompact: {
+		toolSearch: false,
+		computerUse: true,
+		compaction: true,
+	},
+	compactOnly: {
+		toolSearch: false,
+		computerUse: false,
+		compaction: true,
 	},
 	basic: {
 		toolSearch: false,
 		computerUse: false,
+		compaction: false,
 	},
 } as const satisfies Record<string, ModelCapabilities>;
 
@@ -103,7 +117,7 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "high",
 		supportedReasoningEfforts: ["medium", "high", "xhigh"],
-		capabilities: TOOL_CAPABILITIES.computerOnly,
+		capabilities: TOOL_CAPABILITIES.computerAndCompact,
 	},
 	"gpt-5.2-pro": {
 		normalizedModel: "gpt-5.2-pro",
@@ -145,14 +159,14 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "medium",
 		supportedReasoningEfforts: ["medium"],
-		capabilities: TOOL_CAPABILITIES.basic,
+		capabilities: TOOL_CAPABILITIES.compactOnly,
 	},
 	"gpt-5-nano": {
 		normalizedModel: "gpt-5-nano",
 		promptFamily: "gpt-5.2",
 		defaultReasoningEffort: "medium",
 		supportedReasoningEfforts: ["medium"],
-		capabilities: TOOL_CAPABILITIES.basic,
+		capabilities: TOOL_CAPABILITIES.compactOnly,
 	},
 } as const;
 
diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts
index 6c002476..3f6a3353 100644
--- a/lib/request/request-transformer.ts
+++ b/lib/request/request-transformer.ts
@@ -33,6 +33,7 @@ export interface TransformRequestBodyParams {
 	fastSession?: boolean;
 	fastSessionStrategy?: FastSessionStrategy;
 	fastSessionMaxInputItems?: number;
+	deferFastSessionInputTrimming?: boolean;
 }
 
 const PLAN_MODE_ONLY_TOOLS = new Set(["request_user_input"]);
@@ -482,6 +483,15 @@ export function trimInputForFastSession(
 	return trimmed.slice(trimmed.length - safeMax);
 }
 
+export interface FastSessionInputTrimPlan {
+	shouldApply: boolean;
+	isTrivialTurn: boolean;
+	trim?: {
+		maxItems: number;
+		preferLatestUserOnly: boolean;
+	};
+}
+
 function isTrivialLatestPrompt(text: string): boolean {
 	const normalized = text.trim();
 	if (!normalized) return false;
@@ -540,6 +550,33 @@ function isComplexFastSessionRequest(
 	return false;
 }
 
+export function resolveFastSessionInputTrimPlan(
+	body: RequestBody,
+	fastSession: boolean,
+	fastSessionStrategy: FastSessionStrategy,
+	fastSessionMaxInputItems: number,
+): FastSessionInputTrimPlan {
+	const shouldApplyFastSessionTuning =
+		fastSession &&
+		(fastSessionStrategy === "always" ||
+			!isComplexFastSessionRequest(body, fastSessionMaxInputItems));
+	const latestUserText = getLatestUserText(body.input);
+	const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
+	const shouldPreferLatestUserOnly =
+		shouldApplyFastSessionTuning && isTrivialTurn;
+
+	return {
+		shouldApply: shouldApplyFastSessionTuning,
+		isTrivialTurn,
+		trim: shouldApplyFastSessionTuning
+			? {
+					maxItems: fastSessionMaxInputItems,
+					preferLatestUserOnly: shouldPreferLatestUserOnly,
+				}
+			: undefined,
+	};
+}
+
 function getLatestUserText(input: InputItem[] | undefined): string | undefined {
 	if (!Array.isArray(input)) return undefined;
 	for (let i = input.length - 1; i >= 0; i--) {
@@ -672,6 +709,7 @@ export async function transformRequestBody(
 	fastSession?: boolean,
 	fastSessionStrategy?: FastSessionStrategy,
 	fastSessionMaxInputItems?: number,
+	deferFastSessionInputTrimming?: boolean,
 ): Promise<RequestBody>;
 export async function transformRequestBody(
 	bodyOrParams: RequestBody | TransformRequestBodyParams,
@@ -681,6 +719,7 @@ export async function transformRequestBody(
 	fastSession = false,
 	fastSessionStrategy: FastSessionStrategy = "hybrid",
 	fastSessionMaxInputItems = 30,
+	deferFastSessionInputTrimming = false,
 ): Promise<RequestBody> {
 	const useNamedParams =
 		typeof codexInstructions === "undefined" &&
@@ -695,6 +734,7 @@ export async function transformRequestBody(
 	let resolvedFastSession: boolean;
 	let resolvedFastSessionStrategy: FastSessionStrategy;
 	let resolvedFastSessionMaxInputItems: number;
+	let resolvedDeferFastSessionInputTrimming: boolean;
 
 	if (useNamedParams) {
 		const namedParams = bodyOrParams as TransformRequestBodyParams;
@@ -705,6 +745,8 @@ export async function transformRequestBody(
 		resolvedFastSession = namedParams.fastSession ?? false;
 		resolvedFastSessionStrategy = namedParams.fastSessionStrategy ?? "hybrid";
 		resolvedFastSessionMaxInputItems = namedParams.fastSessionMaxInputItems ?? 30;
+		resolvedDeferFastSessionInputTrimming =
+			namedParams.deferFastSessionInputTrimming ?? false;
 	} else {
 		body = bodyOrParams as RequestBody;
 		resolvedCodexInstructions = codexInstructions;
@@ -713,6 +755,7 @@ export async function transformRequestBody(
 		resolvedFastSession = fastSession;
 		resolvedFastSessionStrategy = fastSessionStrategy;
 		resolvedFastSessionMaxInputItems = fastSessionMaxInputItems;
+		resolvedDeferFastSessionInputTrimming = deferFastSessionInputTrimming;
 	}
 
 	if (!body || typeof body !== "object") {
@@ -747,17 +790,17 @@ export async function transformRequestBody(
 	const reasoningModel = shouldUseNormalizedReasoningModel
 		? normalizedModel
 		: lookupModel;
-	const shouldApplyFastSessionTuning =
-		resolvedFastSession &&
-		(resolvedFastSessionStrategy === "always" ||
-			!isComplexFastSessionRequest(body, resolvedFastSessionMaxInputItems));
-	const latestUserText = getLatestUserText(body.input);
-	const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
+	const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
+		body,
+		resolvedFastSession,
+		resolvedFastSessionStrategy,
+		resolvedFastSessionMaxInputItems,
+	);
+	const shouldApplyFastSessionTuning = fastSessionInputTrimPlan.shouldApply;
+	const isTrivialTurn = fastSessionInputTrimPlan.isTrivialTurn;
 	const shouldDisableToolsForTrivialTurn =
 		shouldApplyFastSessionTuning &&
 		isTrivialTurn;
-	const shouldPreferLatestUserOnly =
-		shouldApplyFastSessionTuning && isTrivialTurn;
 
 	// Codex required fields
 	// ChatGPT backend REQUIRES store=false (confirmed via testing)
@@ -789,10 +832,11 @@ export async function transformRequestBody(
 	if (body.input && Array.isArray(body.input)) {
 		let inputItems: InputItem[] = body.input;
 
-			if (shouldApplyFastSessionTuning) {
+			if (shouldApplyFastSessionTuning && !resolvedDeferFastSessionInputTrimming) {
 				inputItems =
 						trimInputForFastSession(inputItems, resolvedFastSessionMaxInputItems, {
-							preferLatestUserOnly: shouldPreferLatestUserOnly,
+							preferLatestUserOnly:
+								fastSessionInputTrimPlan.trim?.preferLatestUserOnly ?? false,
 						}) ?? inputItems;
 			}
 
diff --git a/lib/request/response-compaction.ts b/lib/request/response-compaction.ts
new file mode 100644
index 00000000..82d6f5f8
--- /dev/null
+++ b/lib/request/response-compaction.ts
@@ -0,0 +1,163 @@
+import { logDebug, logWarn } from "../logger.js";
+import type { InputItem, RequestBody } from "../types.js";
+import { isRecord } from "../utils.js";
+import { getModelCapabilities } from "./helpers/model-map.js";
+import { trimInputForFastSession } from "./request-transformer.js";
+
+export interface DeferredFastSessionInputTrim {
+	maxItems: number;
+	preferLatestUserOnly: boolean;
+}
+
+export interface ResponseCompactionResult {
+	body: RequestBody;
+	mode: "compacted" | "trimmed" | "unchanged";
+}
+
+export interface ApplyResponseCompactionParams {
+	body: RequestBody;
+	requestUrl: string;
+	headers: Headers;
+	trim: DeferredFastSessionInputTrim;
+	fetchImpl: typeof fetch;
+	signal?: AbortSignal | null;
+	timeoutMs?: number;
+}
+
+function isInputItemArray(value: unknown): value is InputItem[] {
+	return Array.isArray(value) && value.every((item) => isRecord(item));
+}
+
+function extractCompactedInput(payload: unknown): InputItem[] | undefined {
+	if (!isRecord(payload)) return undefined;
+	if (isInputItemArray(payload.output)) return payload.output;
+	if (isInputItemArray(payload.input)) return payload.input;
+
+	const response = payload.response;
+	if (!isRecord(response)) return undefined;
+	if (isInputItemArray(response.output)) return response.output;
+	if (isInputItemArray(response.input)) return response.input;
+	return undefined;
+}
+
+function buildCompactionUrl(requestUrl: string): string {
+	const queryIndex = requestUrl.indexOf("?");
+	const baseUrl = queryIndex === -1 ? requestUrl : requestUrl.slice(0, queryIndex);
+	if (baseUrl.endsWith("/compact")) return requestUrl;
+	return queryIndex === -1
+		? `${requestUrl}/compact`
+		: `${baseUrl}/compact${requestUrl.slice(queryIndex)}`;
+}
+
+function createFallbackBody(
+	body: RequestBody,
+	trim: DeferredFastSessionInputTrim,
+): RequestBody | undefined {
+	if (!Array.isArray(body.input)) return undefined;
+	const trimmedInput =
+		trimInputForFastSession(body.input, trim.maxItems, {
+			preferLatestUserOnly: trim.preferLatestUserOnly,
+		}) ?? body.input;
+
+	return trimmedInput === body.input ? undefined : { ...body, input: trimmedInput };
+}
+
+function createTimedAbortSignal(
+	signal: AbortSignal | null | undefined,
+	timeoutMs: number,
+): { signal: AbortSignal; cleanup: () => void } {
+	const controller = new AbortController();
+	const timeout = setTimeout(() => {
+		controller.abort(new Error("Response compaction timeout"));
+	}, timeoutMs);
+
+	const onAbort = () => {
+		controller.abort(signal?.reason ?? new Error("Aborted"));
+	};
+
+	if (signal?.aborted) {
+		onAbort();
+	} else if (signal) {
+		signal.addEventListener("abort", onAbort, { once: true });
+	}
+
+	return {
+		signal: controller.signal,
+		cleanup: () => {
+			clearTimeout(timeout);
+			signal?.removeEventListener("abort", onAbort);
+		},
+	};
+}
+
+export async function applyResponseCompaction(
+	params: ApplyResponseCompactionParams,
+): Promise<ResponseCompactionResult> {
+	const fallbackBody = createFallbackBody(params.body, params.trim);
+	if (!fallbackBody) {
+		return { body: params.body, mode: "unchanged" };
+	}
+
+	if (!getModelCapabilities(params.body.model).compaction) {
+		return { body: fallbackBody, mode: "trimmed" };
+	}
+
+	const compactionHeaders = new Headers(params.headers);
+	compactionHeaders.set("accept", "application/json");
+	compactionHeaders.set("content-type", "application/json");
+	const { signal, cleanup } = createTimedAbortSignal(
+		params.signal,
+		Math.max(250, params.timeoutMs ?? 4_000),
+	);
+
+	try {
+		const response = await params.fetchImpl(buildCompactionUrl(params.requestUrl), {
+			method: "POST",
+			headers: compactionHeaders,
+			body: JSON.stringify({
+				model: params.body.model,
+				input: params.body.input,
+			}),
+			signal,
+		});
+
+		if (!response.ok) {
+			logWarn("Responses compaction request failed; using trim fallback.", {
+				status: response.status,
+				statusText: response.statusText,
+				model: params.body.model,
+			});
+			return { body: fallbackBody, mode: "trimmed" };
+		}
+
+		const payload = (await response.json()) as unknown;
+		const compactedInput = extractCompactedInput(payload);
+		if (!compactedInput || compactedInput.length === 0) {
+			logWarn("Responses compaction returned no reusable input; using trim fallback.", {
+				model: params.body.model,
+			});
+			return { body: fallbackBody, mode: "trimmed" };
+		}
+
+		logDebug("Applied server-side response compaction.", {
+			model: params.body.model,
+			originalInputLength: Array.isArray(params.body.input) ? params.body.input.length : 0,
+			compactedInputLength: compactedInput.length,
+		});
+		return { body: { ...params.body, input: compactedInput }, mode: "compacted" };
+	} catch (error) {
+		if (signal.aborted && params.signal?.aborted) {
+			throw params.signal.reason instanceof Error
+				? params.signal.reason
+				: new Error("Aborted");
+		}
+
+		logWarn("Responses compaction failed; using trim fallback.", {
+			model: params.body.model,
+			error: error instanceof Error ? error.message : String(error),
+		});
+		return { body: fallbackBody, mode: "trimmed" };
+	} finally {
+		cleanup();
+	}
+}
diff --git a/lib/session-affinity.ts b/lib/session-affinity.ts
index 9a90950f..1ce27e30 100644
--- a/lib/session-affinity.ts
+++ b/lib/session-affinity.ts
@@ -98,6 +98,14 @@ export class SessionAffinityStore {
 	 * This method does not create a new affinity entry; callers that need to
 	 * upsert continuation state should use `rememberWithResponseId`.
 	 */
+	rememberLastResponseId(
+		sessionKey: string | null | undefined,
+		responseId: string | null | undefined,
+		now = Date.now(),
+	): void {
+		this.updateLastResponseId(sessionKey, responseId, now);
+	}
+
 	updateLastResponseId(
 		sessionKey: string | null | undefined,
 		responseId: string | null | undefined,
diff --git a/test/codex-manager-cli.test.ts b/test/codex-manager-cli.test.ts
index 613d6c93..5d41d384 100644
--- a/test/codex-manager-cli.test.ts
+++ b/test/codex-manager-cli.test.ts
@@ -5707,7 +5707,7 @@ describe("codex manager cli commands", () => {
 				normalized: string;
 				remapped: boolean;
 				promptFamily: string;
-				capabilities: { toolSearch: boolean; computerUse: boolean };
+				capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean };
 			};
 		};
 		expect(payload.command).toBe("report");
@@ -5722,6 +5722,7 @@ describe("codex manager cli commands", () => {
 			capabilities: {
 				toolSearch: false,
 				computerUse: false,
+				compaction: false,
 			},
 		});
 	});
@@ -5760,7 +5761,7 @@ describe("codex manager cli commands", () => {
 				normalized: string;
 				remapped: boolean;
 				promptFamily: string;
-				capabilities: { toolSearch: boolean; computerUse: boolean };
+				capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean };
 			};
 		};
 		expect(payload.modelSelection).toEqual({
@@ -5771,6 +5772,7 @@ describe("codex manager cli commands", () => {
 			capabilities: {
 				toolSearch: false,
 				computerUse: false,
+				compaction: true,
 			},
 		});
 	});
diff --git a/test/index.test.ts b/test/index.test.ts
index 954e6621..95b8a0cb 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -136,9 +136,13 @@ vi.mock("../lib/live-account-sync.js", () => ({
 	LiveAccountSync: liveAccountSyncCtorMock,
 }));
 
-vi.mock("../lib/request/request-transformer.js", () => ({
-	applyFastSessionDefaults: <T>(config: T) => config,
-}));
+vi.mock("../lib/request/request-transformer.js", async () => {
+	const actual = await vi.importActual("../lib/request/request-transformer.js");
+	return {
+		...(actual as Record<string, unknown>),
+		applyFastSessionDefaults: <T>(config: T) => config,
+	};
+});
 
 vi.mock("../lib/logger.js", () => ({
 	initLogger: vi.fn(),
@@ -1636,6 +1640,149 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		expect(thirdHeaders.get("x-test-access-token")).toBe("access-alpha");
 	});
 
+	it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => {
+		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
+		const longInput = Array.from({ length: 12 }, (_value, index) => ({
+			type: "message",
+			role: index === 0 ? "developer" : "user",
+			content: index === 0 ? "system prompt" : `message-${index}`,
+		}));
+		const compactedInput = [
+			{
+				type: "message",
+				role: "assistant",
+				content: "compacted summary",
+			},
+		];
+
+		vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({
+			updatedInit: {
+				method: "POST",
+				body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+			},
+			body: { model: "gpt-5-mini", input: longInput },
+			deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false },
+		});
+
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ output: compactedInput }), { status: 200 }),
+			)
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ content: "ok" }), { status: 200 }),
+			);
+
+		const { sdk } = await setupPlugin();
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+		});
+
+		expect(response.status).toBe(200);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(2);
+		expect(vi.mocked(globalThis.fetch).mock.calls[0]?.[0]).toBe(
+			"https://api.openai.com/v1/chat/compact",
+		);
+
+		const upstreamInit = vi.mocked(globalThis.fetch).mock.calls[1]?.[1] as RequestInit;
+		const upstreamBody =
+			typeof upstreamInit.body === "string"
+				? (JSON.parse(upstreamInit.body) as { input?: unknown[] })
+				: {};
+		expect(upstreamBody.input).toEqual(compactedInput);
+	});
+
+	it("does not rerun fast-session compaction after rotating to another account", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
+		const longInput = Array.from({ length: 12 }, (_value, index) => ({
+			type: "message",
+			role: index === 0 ? "developer" : "user",
+			content: index === 0 ? "system prompt" : `message-${index}`,
+		}));
+		const partiallyCompactedInput = Array.from({ length: 10 }, (_value, index) => ({
+			type: "message",
+			role: index === 0 ? "developer" : "user",
+			content: index === 0 ? "compacted system prompt" : `compacted-${index}`,
+		}));
+		const manager = buildRoutingManager([
+			{
+				index: 0,
+				accountId: "token-primary",
+				accountIdSource: "token",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+				accessToken: "access-alpha",
+			},
+			{
+				index: 1,
+				accountId: "workspace-fallback",
+				accountIdSource: "org",
+				email: "beta@example.com",
+				refreshToken: "refresh-2",
+				accessToken: "access-beta",
+			},
+		]);
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValueOnce(manager as never);
+		vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({
+			updatedInit: {
+				method: "POST",
+				body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+			},
+			body: { model: "gpt-5-mini", input: longInput },
+			deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false },
+		});
+		vi.mocked(fetchHelpers.createCodexHeaders).mockImplementation(
+			(_init, accountId, accessToken) =>
+				new Headers({
+					"x-test-account-id": String(accountId),
+					"x-test-access-token": String(accessToken),
+				}),
+		);
+		globalThis.fetch = vi.fn(async (requestUrl, init) => {
+			const normalizedUrl =
+				typeof requestUrl === "string" ? requestUrl : String(requestUrl);
+			if (normalizedUrl.endsWith("/compact")) {
+				return new Response(JSON.stringify({ output: partiallyCompactedInput }), {
+					status: 200,
+				});
+			}
+
+			const headers = new Headers(init?.headers);
+			if (headers.get("x-test-access-token") === "access-alpha") {
+				throw new Error("Network timeout");
+			}
+
+			return new Response(JSON.stringify({ content: "ok" }), { status: 200 });
+		});
+
+		const { sdk } = await setupPlugin();
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5-mini", input: longInput }),
+		});
+
+		expect(response.status).toBe(200);
+		const fetchCalls = vi.mocked(globalThis.fetch).mock.calls;
+		const compactionCalls = fetchCalls.filter(([requestUrl]) =>
+			String(requestUrl).endsWith("/compact"),
+		);
+		expect(compactionCalls).toHaveLength(1);
+
+		const finalCall = fetchCalls[fetchCalls.length - 1];
+		const finalHeaders = new Headers(finalCall?.[1]?.headers);
+		expect(finalHeaders.get("x-test-account-id")).toBe("workspace-fallback");
+		expect(finalHeaders.get("x-test-access-token")).toBe("access-beta");
+
+		const finalUpstreamInit = finalCall?.[1] as RequestInit;
+		const finalUpstreamBody =
+			typeof finalUpstreamInit.body === "string"
+				? (JSON.parse(finalUpstreamInit.body) as { input?: unknown[] })
+				: {};
+		expect(finalUpstreamBody.input).toEqual(partiallyCompactedInput);
+	});
+
 	it("uses the refreshed token email when checking entitlement blocks", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const manager = buildRoutingManager([
diff --git a/test/model-map.test.ts b/test/model-map.test.ts
index 6ad16967..6fe07b56 100644
--- a/test/model-map.test.ts
+++ b/test/model-map.test.ts
@@ -84,14 +84,22 @@ describe("model map", () => {
 			expect(getModelCapabilities("gpt-5.4")).toEqual({
 				toolSearch: true,
 				computerUse: true,
+				compaction: true,
 			});
 			expect(getModelCapabilities("gpt-5.4-pro")).toEqual({
 				toolSearch: false,
 				computerUse: true,
+				compaction: true,
 			});
 			expect(getModelCapabilities("gpt-5-mini")).toEqual({
 				toolSearch: false,
 				computerUse: false,
+				compaction: true,
+			});
+			expect(getModelCapabilities("gpt-5-nano")).toEqual({
+				toolSearch: false,
+				computerUse: false,
+				compaction: true,
 			});
 		});
 	});
diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts
index 51eb1214..17efbbcf 100644
--- a/test/request-transformer.test.ts
+++ b/test/request-transformer.test.ts
@@ -653,9 +653,31 @@ describe('Request Transformer Module', () => {
 						},
 					},
 				};
-				const result = await transformRequestBody(body, codexInstructions);
-				expect(result.text?.verbosity).toBe('medium');
-				expect(result.text?.format).toEqual(body.text?.format);
+			const result = await transformRequestBody(body, codexInstructions);
+			expect(result.text?.verbosity).toBe('medium');
+			expect(result.text?.format).toEqual(body.text?.format);
+		});
+
+			it('defers fast-session input trimming when requested for downstream compaction', async () => {
+				const body: RequestBody = {
+					model: 'gpt-5.4',
+					input: Array.from({ length: 12 }, (_value, index) => ({
+						type: 'message',
+						role: index === 0 ? 'developer' : 'user',
+						content: index === 0 ? 'system prompt' : `message-${index}`,
+					})),
+				};
+				const result = await transformRequestBody(
+					body,
+					codexInstructions,
+					{ global: {}, models: {} },
+					true,
+					true,
+					'always',
+					8,
+					true,
+				);
+				expect(result.input).toHaveLength(12);
 			});
 
 		it('should set required Codex fields', async () => {
diff --git a/test/response-compaction.test.ts b/test/response-compaction.test.ts
new file mode 100644
index 00000000..38cf7261
--- /dev/null
+++ b/test/response-compaction.test.ts
@@ -0,0 +1,138 @@
+import { applyResponseCompaction } from "../lib/request/response-compaction.js";
+import type { RequestBody } from "../lib/types.js";
+
+function buildInput(length: number) {
+	return Array.from({ length }, (_value, index) => ({
+		type: "message",
+		role: index === 0 ? "developer" : "user",
+		content: index === 0 ? "system prompt" : `message-${index}`,
+	}));
+}
+
+describe("response compaction", () => {
+	it("returns unchanged when the fast-session trim would be a no-op", async () => {
+		const body: RequestBody = {
+			model: "gpt-5.4",
+			input: buildInput(2),
+		};
+		const fetchImpl = vi.fn<typeof fetch>();
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("unchanged");
+		expect(fetchImpl).not.toHaveBeenCalled();
+		expect(result.body.input).toEqual(body.input);
+	});
+
+	it("falls back to local trimming when the model does not support compaction", async () => {
+		const body: RequestBody = {
+			model: "gpt-5-codex",
+			input: buildInput(10),
+		};
+		const fetchImpl = vi.fn<typeof fetch>();
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("trimmed");
+		expect(fetchImpl).not.toHaveBeenCalled();
+		expect(result.body.input).toHaveLength(8);
+	});
+
+	it("replaces request input with server-compacted output when available", async () => {
+		const compactedOutput = [
+			{
+				type: "message",
+				role: "assistant",
+				content: "compacted summary",
+			},
+		];
+		const body: RequestBody = {
+			model: "gpt-5-mini",
+			input: buildInput(12),
+		};
+		const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+			new Response(JSON.stringify({ output: compactedOutput }), { status: 200 }),
+		);
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers({ accept: "text/event-stream" }),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("compacted");
+		expect(result.body.input).toEqual(compactedOutput);
+		expect(fetchImpl).toHaveBeenCalledTimes(1);
+		expect(fetchImpl).toHaveBeenCalledWith(
+			"https://chatgpt.com/backend-api/codex/responses/compact",
+			expect.objectContaining({
+				method: "POST",
+				headers: expect.any(Headers),
+			}),
+		);
+
+		const requestInit = vi.mocked(fetchImpl).mock.calls[0]?.[1];
+		const headers = new Headers(requestInit?.headers);
+		expect(headers.get("accept")).toBe("application/json");
+		expect(headers.get("content-type")).toBe("application/json");
+	});
+
+	it("inserts /compact before query params in the compaction request URL", async () => {
+		const body: RequestBody = {
+			model: "gpt-5-mini",
+			input: buildInput(12),
+		};
+		const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+			new Response(JSON.stringify({ output: buildInput(8) }), { status: 200 }),
+		);
+
+		await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses?stream=true",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(fetchImpl).toHaveBeenCalledWith(
+			"https://chatgpt.com/backend-api/codex/responses/compact?stream=true",
+			expect.any(Object),
+		);
+	});
+
+	it("falls back to local trimming when the compaction request fails", async () => {
+		const body: RequestBody = {
+			model: "gpt-5.4",
+			input: buildInput(12),
+		};
+		const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+			new Response(JSON.stringify({ error: { message: "nope" } }), { status: 404 }),
+		);
+
+		const result = await applyResponseCompaction({
+			body,
+			requestUrl: "https://chatgpt.com/backend-api/codex/responses",
+			headers: new Headers(),
+			trim: { maxItems: 8, preferLatestUserOnly: false },
+			fetchImpl,
+		});
+
+		expect(result.mode).toBe("trimmed");
+		expect(result.body.input).toHaveLength(8);
+		expect(fetchImpl).toHaveBeenCalledTimes(1);
+	});
+});