diff --git a/index.ts b/index.ts index 13c03abb..42f02245 100644 --- a/index.ts +++ b/index.ts @@ -154,6 +154,7 @@ import { isWorkspaceDisabledError, } from "./lib/request/fetch-helpers.js"; import { applyFastSessionDefaults } from "./lib/request/request-transformer.js"; +import { applyResponseCompaction } from "./lib/request/response-compaction.js"; import { getRateLimitBackoff, RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS, @@ -1351,10 +1352,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { fastSession: fastSessionEnabled, fastSessionStrategy, fastSessionMaxInputItems, + deferFastSessionInputTrimming: fastSessionEnabled, }, ); let requestInit = transformation?.updatedInit ?? baseInit; let transformedBody: RequestBody | undefined = transformation?.body; + let pendingFastSessionInputTrim = + transformation?.deferredFastSessionInputTrim; const promptCacheKey = transformedBody?.prompt_cache_key; let model = transformedBody?.model; let modelFamily = model ? getModelFamily(model) : "gpt-5.1"; @@ -1672,6 +1676,38 @@ accountAttemptLoop: while (attempted.size < Math.max(1, accountCount)) { promptCacheKey: effectivePromptCacheKey, }, ); + if (transformedBody && pendingFastSessionInputTrim) { + const activeFastSessionInputTrim = pendingFastSessionInputTrim; + pendingFastSessionInputTrim = undefined; + const compactionResult = await applyResponseCompaction({ + body: transformedBody, + requestUrl: url, + headers, + trim: activeFastSessionInputTrim, + fetchImpl: async (requestUrl, requestInit) => { + const normalizedCompactionUrl = + typeof requestUrl === "string" + ? requestUrl + : String(requestUrl); + return fetch( + normalizedCompactionUrl, + applyProxyCompatibleInit( + normalizedCompactionUrl, + requestInit, + ), + ); + }, + signal: abortSignal, + timeoutMs: Math.min(fetchTimeoutMs, 4_000), + }); + if (compactionResult.mode !== "unchanged") { + transformedBody = compactionResult.body; + requestInit = { + ...(requestInit ?? {}), + body: JSON.stringify(transformedBody), + }; + } + } const quotaScheduleKey = `${entitlementAccountKey}:${model ?? modelFamily}`; const capabilityModelKey = model ?? modelFamily; const quotaDeferral = preemptiveQuotaScheduler.getDeferral(quotaScheduleKey); diff --git a/lib/request/fetch-helpers.ts b/lib/request/fetch-helpers.ts index 37043418..348ea4f8 100644 --- a/lib/request/fetch-helpers.ts +++ b/lib/request/fetch-helpers.ts @@ -8,7 +8,12 @@ import { ProxyAgent } from "undici"; import { queuedRefresh } from "../refresh-queue.js"; import { logRequest, logError, logWarn } from "../logger.js"; import { getCodexInstructions, getModelFamily } from "../prompts/codex.js"; -import { transformRequestBody, normalizeModel } from "./request-transformer.js"; +import { + transformRequestBody, + normalizeModel, + resolveFastSessionInputTrimPlan, + type FastSessionInputTrimPlan, +} from "./request-transformer.js"; import { attachResponseIdCapture, convertSseToJson, @@ -99,6 +104,12 @@ export interface ResolveUnsupportedCodexFallbackOptions { customChain?: Record; } +export interface TransformRequestForCodexResult { + body: RequestBody; + updatedInit: RequestInit; + deferredFastSessionInputTrim?: FastSessionInputTrimPlan["trim"]; +} + function canonicalizeModelName(model: string | undefined): string | undefined { if (!model) return undefined; const trimmed = model.trim().toLowerCase(); @@ -651,8 +662,9 @@ export async function transformRequestForCodex( fastSession?: boolean; fastSessionStrategy?: "hybrid" | "always"; fastSessionMaxInputItems?: number; + deferFastSessionInputTrimming?: boolean; }, -): Promise<{ body: RequestBody; updatedInit: RequestInit } | undefined> { +): Promise { const hasParsedBody = parsedBody !== undefined && parsedBody !== null && @@ -670,6 +682,12 @@ export async function transformRequestForCodex( body = JSON.parse(init.body) as RequestBody; } const originalModel = body.model; + const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan( + body, + options?.fastSession ?? false, + options?.fastSessionStrategy ?? "hybrid", + options?.fastSessionMaxInputItems ?? 30, + ); // Normalize model first to determine which instructions to fetch // This ensures we get the correct model-specific prompt @@ -700,6 +718,7 @@ export async function transformRequestForCodex( options?.fastSession ?? false, options?.fastSessionStrategy ?? "hybrid", options?.fastSessionMaxInputItems ?? 30, + options?.deferFastSessionInputTrimming ?? false, ); // Log transformed request @@ -720,6 +739,10 @@ export async function transformRequestForCodex( return { body: transformedBody, updatedInit: { ...(init ?? {}), body: JSON.stringify(transformedBody) }, + deferredFastSessionInputTrim: + options?.deferFastSessionInputTrimming === true + ? fastSessionInputTrimPlan.trim + : undefined, }; } catch (e) { logError(`${ERROR_MESSAGES.REQUEST_PARSE_ERROR}`, e); diff --git a/lib/request/helpers/model-map.ts b/lib/request/helpers/model-map.ts index 20a6832d..4f303ae0 100644 --- a/lib/request/helpers/model-map.ts +++ b/lib/request/helpers/model-map.ts @@ -25,6 +25,7 @@ export type PromptModelFamily = export interface ModelCapabilities { toolSearch: boolean; computerUse: boolean; + compaction: boolean; } export interface ModelProfile { @@ -48,14 +49,27 @@ const TOOL_CAPABILITIES = { full: { toolSearch: true, computerUse: true, + compaction: true, }, computerOnly: { toolSearch: false, computerUse: true, + compaction: false, + }, + computerAndCompact: { + toolSearch: false, + computerUse: true, + compaction: true, + }, + compactOnly: { + toolSearch: false, + computerUse: false, + compaction: true, }, basic: { toolSearch: false, computerUse: false, + compaction: false, }, } as const satisfies Record; @@ -103,7 +117,7 @@ export const MODEL_PROFILES: Record = { promptFamily: "gpt-5.2", defaultReasoningEffort: "high", supportedReasoningEfforts: ["medium", "high", "xhigh"], - capabilities: TOOL_CAPABILITIES.computerOnly, + capabilities: TOOL_CAPABILITIES.computerAndCompact, }, "gpt-5.2-pro": { normalizedModel: "gpt-5.2-pro", @@ -145,14 +159,14 @@ export const MODEL_PROFILES: Record = { promptFamily: "gpt-5.2", defaultReasoningEffort: "medium", supportedReasoningEfforts: ["medium"], - capabilities: TOOL_CAPABILITIES.basic, + capabilities: TOOL_CAPABILITIES.compactOnly, }, "gpt-5-nano": { normalizedModel: "gpt-5-nano", promptFamily: "gpt-5.2", defaultReasoningEffort: "medium", supportedReasoningEfforts: ["medium"], - capabilities: TOOL_CAPABILITIES.basic, + capabilities: TOOL_CAPABILITIES.compactOnly, }, } as const; diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts index 6c002476..3f6a3353 100644 --- a/lib/request/request-transformer.ts +++ b/lib/request/request-transformer.ts @@ -33,6 +33,7 @@ export interface TransformRequestBodyParams { fastSession?: boolean; fastSessionStrategy?: FastSessionStrategy; fastSessionMaxInputItems?: number; + deferFastSessionInputTrimming?: boolean; } const PLAN_MODE_ONLY_TOOLS = new Set(["request_user_input"]); @@ -482,6 +483,15 @@ export function trimInputForFastSession( return trimmed.slice(trimmed.length - safeMax); } +export interface FastSessionInputTrimPlan { + shouldApply: boolean; + isTrivialTurn: boolean; + trim?: { + maxItems: number; + preferLatestUserOnly: boolean; + }; +} + function isTrivialLatestPrompt(text: string): boolean { const normalized = text.trim(); if (!normalized) return false; @@ -540,6 +550,33 @@ function isComplexFastSessionRequest( return false; } +export function resolveFastSessionInputTrimPlan( + body: RequestBody, + fastSession: boolean, + fastSessionStrategy: FastSessionStrategy, + fastSessionMaxInputItems: number, +): FastSessionInputTrimPlan { + const shouldApplyFastSessionTuning = + fastSession && + (fastSessionStrategy === "always" || + !isComplexFastSessionRequest(body, fastSessionMaxInputItems)); + const latestUserText = getLatestUserText(body.input); + const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? ""); + const shouldPreferLatestUserOnly = + shouldApplyFastSessionTuning && isTrivialTurn; + + return { + shouldApply: shouldApplyFastSessionTuning, + isTrivialTurn, + trim: shouldApplyFastSessionTuning + ? { + maxItems: fastSessionMaxInputItems, + preferLatestUserOnly: shouldPreferLatestUserOnly, + } + : undefined, + }; +} + function getLatestUserText(input: InputItem[] | undefined): string | undefined { if (!Array.isArray(input)) return undefined; for (let i = input.length - 1; i >= 0; i--) { @@ -672,6 +709,7 @@ export async function transformRequestBody( fastSession?: boolean, fastSessionStrategy?: FastSessionStrategy, fastSessionMaxInputItems?: number, + deferFastSessionInputTrimming?: boolean, ): Promise; export async function transformRequestBody( bodyOrParams: RequestBody | TransformRequestBodyParams, @@ -681,6 +719,7 @@ export async function transformRequestBody( fastSession = false, fastSessionStrategy: FastSessionStrategy = "hybrid", fastSessionMaxInputItems = 30, + deferFastSessionInputTrimming = false, ): Promise { const useNamedParams = typeof codexInstructions === "undefined" && @@ -695,6 +734,7 @@ export async function transformRequestBody( let resolvedFastSession: boolean; let resolvedFastSessionStrategy: FastSessionStrategy; let resolvedFastSessionMaxInputItems: number; + let resolvedDeferFastSessionInputTrimming: boolean; if (useNamedParams) { const namedParams = bodyOrParams as TransformRequestBodyParams; @@ -705,6 +745,8 @@ export async function transformRequestBody( resolvedFastSession = namedParams.fastSession ?? false; resolvedFastSessionStrategy = namedParams.fastSessionStrategy ?? "hybrid"; resolvedFastSessionMaxInputItems = namedParams.fastSessionMaxInputItems ?? 30; + resolvedDeferFastSessionInputTrimming = + namedParams.deferFastSessionInputTrimming ?? false; } else { body = bodyOrParams as RequestBody; resolvedCodexInstructions = codexInstructions; @@ -713,6 +755,7 @@ export async function transformRequestBody( resolvedFastSession = fastSession; resolvedFastSessionStrategy = fastSessionStrategy; resolvedFastSessionMaxInputItems = fastSessionMaxInputItems; + resolvedDeferFastSessionInputTrimming = deferFastSessionInputTrimming; } if (!body || typeof body !== "object") { @@ -747,17 +790,17 @@ export async function transformRequestBody( const reasoningModel = shouldUseNormalizedReasoningModel ? normalizedModel : lookupModel; - const shouldApplyFastSessionTuning = - resolvedFastSession && - (resolvedFastSessionStrategy === "always" || - !isComplexFastSessionRequest(body, resolvedFastSessionMaxInputItems)); - const latestUserText = getLatestUserText(body.input); - const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? ""); + const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan( + body, + resolvedFastSession, + resolvedFastSessionStrategy, + resolvedFastSessionMaxInputItems, + ); + const shouldApplyFastSessionTuning = fastSessionInputTrimPlan.shouldApply; + const isTrivialTurn = fastSessionInputTrimPlan.isTrivialTurn; const shouldDisableToolsForTrivialTurn = shouldApplyFastSessionTuning && isTrivialTurn; - const shouldPreferLatestUserOnly = - shouldApplyFastSessionTuning && isTrivialTurn; // Codex required fields // ChatGPT backend REQUIRES store=false (confirmed via testing) @@ -789,10 +832,11 @@ export async function transformRequestBody( if (body.input && Array.isArray(body.input)) { let inputItems: InputItem[] = body.input; - if (shouldApplyFastSessionTuning) { + if (shouldApplyFastSessionTuning && !resolvedDeferFastSessionInputTrimming) { inputItems = trimInputForFastSession(inputItems, resolvedFastSessionMaxInputItems, { - preferLatestUserOnly: shouldPreferLatestUserOnly, + preferLatestUserOnly: + fastSessionInputTrimPlan.trim?.preferLatestUserOnly ?? false, }) ?? inputItems; } diff --git a/lib/request/response-compaction.ts b/lib/request/response-compaction.ts new file mode 100644 index 00000000..82d6f5f8 --- /dev/null +++ b/lib/request/response-compaction.ts @@ -0,0 +1,163 @@ +import { logDebug, logWarn } from "../logger.js"; +import type { InputItem, RequestBody } from "../types.js"; +import { isRecord } from "../utils.js"; +import { getModelCapabilities } from "./helpers/model-map.js"; +import { trimInputForFastSession } from "./request-transformer.js"; + +export interface DeferredFastSessionInputTrim { + maxItems: number; + preferLatestUserOnly: boolean; +} + +export interface ResponseCompactionResult { + body: RequestBody; + mode: "compacted" | "trimmed" | "unchanged"; +} + +export interface ApplyResponseCompactionParams { + body: RequestBody; + requestUrl: string; + headers: Headers; + trim: DeferredFastSessionInputTrim; + fetchImpl: typeof fetch; + signal?: AbortSignal | null; + timeoutMs?: number; +} + +function isInputItemArray(value: unknown): value is InputItem[] { + return Array.isArray(value) && value.every((item) => isRecord(item)); +} + +function extractCompactedInput(payload: unknown): InputItem[] | undefined { + if (!isRecord(payload)) return undefined; + if (isInputItemArray(payload.output)) return payload.output; + if (isInputItemArray(payload.input)) return payload.input; + + const response = payload.response; + if (!isRecord(response)) return undefined; + if (isInputItemArray(response.output)) return response.output; + if (isInputItemArray(response.input)) return response.input; + return undefined; +} + +function buildCompactionUrl(requestUrl: string): string { + const queryIndex = requestUrl.indexOf("?"); + const baseUrl = queryIndex === -1 ? requestUrl : requestUrl.slice(0, queryIndex); + if (baseUrl.endsWith("/compact")) return requestUrl; + return queryIndex === -1 + ? `${requestUrl}/compact` + : `${baseUrl}/compact${requestUrl.slice(queryIndex)}`; +} + +function createFallbackBody( + body: RequestBody, + trim: DeferredFastSessionInputTrim, +): RequestBody | undefined { + if (!Array.isArray(body.input)) return undefined; + const trimmedInput = + trimInputForFastSession(body.input, trim.maxItems, { + preferLatestUserOnly: trim.preferLatestUserOnly, + }) ?? body.input; + + return trimmedInput === body.input ? undefined : { ...body, input: trimmedInput }; +} + +function createTimedAbortSignal( + signal: AbortSignal | null | undefined, + timeoutMs: number, +): { signal: AbortSignal; cleanup: () => void } { + const controller = new AbortController(); + const timeout = setTimeout(() => { + controller.abort(new Error("Response compaction timeout")); + }, timeoutMs); + + const onAbort = () => { + controller.abort(signal?.reason ?? new Error("Aborted")); + }; + + if (signal?.aborted) { + onAbort(); + } else if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + + return { + signal: controller.signal, + cleanup: () => { + clearTimeout(timeout); + signal?.removeEventListener("abort", onAbort); + }, + }; +} + +export async function applyResponseCompaction( + params: ApplyResponseCompactionParams, +): Promise { + const fallbackBody = createFallbackBody(params.body, params.trim); + if (!fallbackBody) { + return { body: params.body, mode: "unchanged" }; + } + + if (!getModelCapabilities(params.body.model).compaction) { + return { body: fallbackBody, mode: "trimmed" }; + } + + const compactionHeaders = new Headers(params.headers); + compactionHeaders.set("accept", "application/json"); + compactionHeaders.set("content-type", "application/json"); + const { signal, cleanup } = createTimedAbortSignal( + params.signal, + Math.max(250, params.timeoutMs ?? 4_000), + ); + + try { + const response = await params.fetchImpl(buildCompactionUrl(params.requestUrl), { + method: "POST", + headers: compactionHeaders, + body: JSON.stringify({ + model: params.body.model, + input: params.body.input, + }), + signal, + }); + + if (!response.ok) { + logWarn("Responses compaction request failed; using trim fallback.", { + status: response.status, + statusText: response.statusText, + model: params.body.model, + }); + return { body: fallbackBody, mode: "trimmed" }; + } + + const payload = (await response.json()) as unknown; + const compactedInput = extractCompactedInput(payload); + if (!compactedInput || compactedInput.length === 0) { + logWarn("Responses compaction returned no reusable input; using trim fallback.", { + model: params.body.model, + }); + return { body: fallbackBody, mode: "trimmed" }; + } + + logDebug("Applied server-side response compaction.", { + model: params.body.model, + originalInputLength: Array.isArray(params.body.input) ? params.body.input.length : 0, + compactedInputLength: compactedInput.length, + }); + return { body: { ...params.body, input: compactedInput }, mode: "compacted" }; + } catch (error) { + if (signal.aborted && params.signal?.aborted) { + throw params.signal.reason instanceof Error + ? params.signal.reason + : new Error("Aborted"); + } + + logWarn("Responses compaction failed; using trim fallback.", { + model: params.body.model, + error: error instanceof Error ? error.message : String(error), + }); + return { body: fallbackBody, mode: "trimmed" }; + } finally { + cleanup(); + } +} diff --git a/lib/session-affinity.ts b/lib/session-affinity.ts index 9a90950f..1ce27e30 100644 --- a/lib/session-affinity.ts +++ b/lib/session-affinity.ts @@ -98,6 +98,14 @@ export class SessionAffinityStore { * This method does not create a new affinity entry; callers that need to * upsert continuation state should use `rememberWithResponseId`. */ + rememberLastResponseId( + sessionKey: string | null | undefined, + responseId: string | null | undefined, + now = Date.now(), + ): void { + this.updateLastResponseId(sessionKey, responseId, now); + } + updateLastResponseId( sessionKey: string | null | undefined, responseId: string | null | undefined, diff --git a/test/codex-manager-cli.test.ts b/test/codex-manager-cli.test.ts index 613d6c93..5d41d384 100644 --- a/test/codex-manager-cli.test.ts +++ b/test/codex-manager-cli.test.ts @@ -5707,7 +5707,7 @@ describe("codex manager cli commands", () => { normalized: string; remapped: boolean; promptFamily: string; - capabilities: { toolSearch: boolean; computerUse: boolean }; + capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean }; }; }; expect(payload.command).toBe("report"); @@ -5722,6 +5722,7 @@ describe("codex manager cli commands", () => { capabilities: { toolSearch: false, computerUse: false, + compaction: false, }, }); }); @@ -5760,7 +5761,7 @@ describe("codex manager cli commands", () => { normalized: string; remapped: boolean; promptFamily: string; - capabilities: { toolSearch: boolean; computerUse: boolean }; + capabilities: { toolSearch: boolean; computerUse: boolean; compaction: boolean }; }; }; expect(payload.modelSelection).toEqual({ @@ -5771,6 +5772,7 @@ describe("codex manager cli commands", () => { capabilities: { toolSearch: false, computerUse: false, + compaction: true, }, }); }); diff --git a/test/index.test.ts b/test/index.test.ts index 954e6621..95b8a0cb 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -136,9 +136,13 @@ vi.mock("../lib/live-account-sync.js", () => ({ LiveAccountSync: liveAccountSyncCtorMock, })); -vi.mock("../lib/request/request-transformer.js", () => ({ - applyFastSessionDefaults: (config: T) => config, -})); +vi.mock("../lib/request/request-transformer.js", async () => { + const actual = await vi.importActual("../lib/request/request-transformer.js"); + return { + ...(actual as Record), + applyFastSessionDefaults: (config: T) => config, + }; +}); vi.mock("../lib/logger.js", () => ({ initLogger: vi.fn(), @@ -1636,6 +1640,149 @@ describe("OpenAIOAuthPlugin fetch handler", () => { expect(thirdHeaders.get("x-test-access-token")).toBe("access-alpha"); }); + it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => { + const fetchHelpers = await import("../lib/request/fetch-helpers.js"); + const longInput = Array.from({ length: 12 }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "system prompt" : `message-${index}`, + })); + const compactedInput = [ + { + type: "message", + role: "assistant", + content: "compacted summary", + }, + ]; + + vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({ + updatedInit: { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }, + body: { model: "gpt-5-mini", input: longInput }, + deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false }, + }); + + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce( + new Response(JSON.stringify({ output: compactedInput }), { status: 200 }), + ) + .mockResolvedValueOnce( + new Response(JSON.stringify({ content: "ok" }), { status: 200 }), + ); + + const { sdk } = await setupPlugin(); + const response = await sdk.fetch!("https://api.openai.com/v1/chat", { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }); + + expect(response.status).toBe(200); + expect(globalThis.fetch).toHaveBeenCalledTimes(2); + expect(vi.mocked(globalThis.fetch).mock.calls[0]?.[0]).toBe( + "https://api.openai.com/v1/chat/compact", + ); + + const upstreamInit = vi.mocked(globalThis.fetch).mock.calls[1]?.[1] as RequestInit; + const upstreamBody = + typeof upstreamInit.body === "string" + ? (JSON.parse(upstreamInit.body) as { input?: unknown[] }) + : {}; + expect(upstreamBody.input).toEqual(compactedInput); + }); + + it("does not rerun fast-session compaction after rotating to another account", async () => { + const { AccountManager } = await import("../lib/accounts.js"); + const fetchHelpers = await import("../lib/request/fetch-helpers.js"); + const longInput = Array.from({ length: 12 }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "system prompt" : `message-${index}`, + })); + const partiallyCompactedInput = Array.from({ length: 10 }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "compacted system prompt" : `compacted-${index}`, + })); + const manager = buildRoutingManager([ + { + index: 0, + accountId: "token-primary", + accountIdSource: "token", + email: "alpha@example.com", + refreshToken: "refresh-1", + accessToken: "access-alpha", + }, + { + index: 1, + accountId: "workspace-fallback", + accountIdSource: "org", + email: "beta@example.com", + refreshToken: "refresh-2", + accessToken: "access-beta", + }, + ]); + vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValueOnce(manager as never); + vi.mocked(fetchHelpers.transformRequestForCodex).mockResolvedValueOnce({ + updatedInit: { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }, + body: { model: "gpt-5-mini", input: longInput }, + deferredFastSessionInputTrim: { maxItems: 8, preferLatestUserOnly: false }, + }); + vi.mocked(fetchHelpers.createCodexHeaders).mockImplementation( + (_init, accountId, accessToken) => + new Headers({ + "x-test-account-id": String(accountId), + "x-test-access-token": String(accessToken), + }), + ); + globalThis.fetch = vi.fn(async (requestUrl, init) => { + const normalizedUrl = + typeof requestUrl === "string" ? requestUrl : String(requestUrl); + if (normalizedUrl.endsWith("/compact")) { + return new Response(JSON.stringify({ output: partiallyCompactedInput }), { + status: 200, + }); + } + + const headers = new Headers(init?.headers); + if (headers.get("x-test-access-token") === "access-alpha") { + throw new Error("Network timeout"); + } + + return new Response(JSON.stringify({ content: "ok" }), { status: 200 }); + }); + + const { sdk } = await setupPlugin(); + const response = await sdk.fetch!("https://api.openai.com/v1/chat", { + method: "POST", + body: JSON.stringify({ model: "gpt-5-mini", input: longInput }), + }); + + expect(response.status).toBe(200); + const fetchCalls = vi.mocked(globalThis.fetch).mock.calls; + const compactionCalls = fetchCalls.filter(([requestUrl]) => + String(requestUrl).endsWith("/compact"), + ); + expect(compactionCalls).toHaveLength(1); + + const finalCall = fetchCalls[fetchCalls.length - 1]; + const finalHeaders = new Headers(finalCall?.[1]?.headers); + expect(finalHeaders.get("x-test-account-id")).toBe("workspace-fallback"); + expect(finalHeaders.get("x-test-access-token")).toBe("access-beta"); + + const finalUpstreamInit = finalCall?.[1] as RequestInit; + const finalUpstreamBody = + typeof finalUpstreamInit.body === "string" + ? (JSON.parse(finalUpstreamInit.body) as { input?: unknown[] }) + : {}; + expect(finalUpstreamBody.input).toEqual(partiallyCompactedInput); + }); + it("uses the refreshed token email when checking entitlement blocks", async () => { const { AccountManager } = await import("../lib/accounts.js"); const manager = buildRoutingManager([ diff --git a/test/model-map.test.ts b/test/model-map.test.ts index 6ad16967..6fe07b56 100644 --- a/test/model-map.test.ts +++ b/test/model-map.test.ts @@ -84,14 +84,22 @@ describe("model map", () => { expect(getModelCapabilities("gpt-5.4")).toEqual({ toolSearch: true, computerUse: true, + compaction: true, }); expect(getModelCapabilities("gpt-5.4-pro")).toEqual({ toolSearch: false, computerUse: true, + compaction: true, }); expect(getModelCapabilities("gpt-5-mini")).toEqual({ toolSearch: false, computerUse: false, + compaction: true, + }); + expect(getModelCapabilities("gpt-5-nano")).toEqual({ + toolSearch: false, + computerUse: false, + compaction: true, }); }); }); diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index 51eb1214..17efbbcf 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -653,9 +653,31 @@ describe('Request Transformer Module', () => { }, }, }; - const result = await transformRequestBody(body, codexInstructions); - expect(result.text?.verbosity).toBe('medium'); - expect(result.text?.format).toEqual(body.text?.format); + const result = await transformRequestBody(body, codexInstructions); + expect(result.text?.verbosity).toBe('medium'); + expect(result.text?.format).toEqual(body.text?.format); + }); + + it('defers fast-session input trimming when requested for downstream compaction', async () => { + const body: RequestBody = { + model: 'gpt-5.4', + input: Array.from({ length: 12 }, (_value, index) => ({ + type: 'message', + role: index === 0 ? 'developer' : 'user', + content: index === 0 ? 'system prompt' : `message-${index}`, + })), + }; + const result = await transformRequestBody( + body, + codexInstructions, + { global: {}, models: {} }, + true, + true, + 'always', + 8, + true, + ); + expect(result.input).toHaveLength(12); }); it('should set required Codex fields', async () => { diff --git a/test/response-compaction.test.ts b/test/response-compaction.test.ts new file mode 100644 index 00000000..38cf7261 --- /dev/null +++ b/test/response-compaction.test.ts @@ -0,0 +1,138 @@ +import { applyResponseCompaction } from "../lib/request/response-compaction.js"; +import type { RequestBody } from "../lib/types.js"; + +function buildInput(length: number) { + return Array.from({ length }, (_value, index) => ({ + type: "message", + role: index === 0 ? "developer" : "user", + content: index === 0 ? "system prompt" : `message-${index}`, + })); +} + +describe("response compaction", () => { + it("returns unchanged when the fast-session trim would be a no-op", async () => { + const body: RequestBody = { + model: "gpt-5.4", + input: buildInput(2), + }; + const fetchImpl = vi.fn(); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("unchanged"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result.body.input).toEqual(body.input); + }); + + it("falls back to local trimming when the model does not support compaction", async () => { + const body: RequestBody = { + model: "gpt-5-codex", + input: buildInput(10), + }; + const fetchImpl = vi.fn(); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("trimmed"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result.body.input).toHaveLength(8); + }); + + it("replaces request input with server-compacted output when available", async () => { + const compactedOutput = [ + { + type: "message", + role: "assistant", + content: "compacted summary", + }, + ]; + const body: RequestBody = { + model: "gpt-5-mini", + input: buildInput(12), + }; + const fetchImpl = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ output: compactedOutput }), { status: 200 }), + ); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers({ accept: "text/event-stream" }), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("compacted"); + expect(result.body.input).toEqual(compactedOutput); + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(fetchImpl).toHaveBeenCalledWith( + "https://chatgpt.com/backend-api/codex/responses/compact", + expect.objectContaining({ + method: "POST", + headers: expect.any(Headers), + }), + ); + + const requestInit = vi.mocked(fetchImpl).mock.calls[0]?.[1]; + const headers = new Headers(requestInit?.headers); + expect(headers.get("accept")).toBe("application/json"); + expect(headers.get("content-type")).toBe("application/json"); + }); + + it("inserts /compact before query params in the compaction request URL", async () => { + const body: RequestBody = { + model: "gpt-5-mini", + input: buildInput(12), + }; + const fetchImpl = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ output: buildInput(8) }), { status: 200 }), + ); + + await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses?stream=true", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(fetchImpl).toHaveBeenCalledWith( + "https://chatgpt.com/backend-api/codex/responses/compact?stream=true", + expect.any(Object), + ); + }); + + it("falls back to local trimming when the compaction request fails", async () => { + const body: RequestBody = { + model: "gpt-5.4", + input: buildInput(12), + }; + const fetchImpl = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ error: { message: "nope" } }), { status: 404 }), + ); + + const result = await applyResponseCompaction({ + body, + requestUrl: "https://chatgpt.com/backend-api/codex/responses", + headers: new Headers(), + trim: { maxItems: 8, preferLatestUserOnly: false }, + fetchImpl, + }); + + expect(result.mode).toBe("trimmed"); + expect(result.body.input).toHaveLength(8); + expect(fetchImpl).toHaveBeenCalledTimes(1); + }); +});