Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ import {
isWorkspaceDisabledError,
} from "./lib/request/fetch-helpers.js";
import { applyFastSessionDefaults } from "./lib/request/request-transformer.js";
import { applyResponseCompaction } from "./lib/request/response-compaction.js";
import {
getRateLimitBackoff,
RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
Expand Down Expand Up @@ -1351,10 +1352,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
fastSession: fastSessionEnabled,
fastSessionStrategy,
fastSessionMaxInputItems,
deferFastSessionInputTrimming: fastSessionEnabled,
},
);
let requestInit = transformation?.updatedInit ?? baseInit;
let transformedBody: RequestBody | undefined = transformation?.body;
let pendingFastSessionInputTrim =
transformation?.deferredFastSessionInputTrim;
const promptCacheKey = transformedBody?.prompt_cache_key;
let model = transformedBody?.model;
let modelFamily = model ? getModelFamily(model) : "gpt-5.1";
Expand Down Expand Up @@ -1672,6 +1676,38 @@ accountAttemptLoop: while (attempted.size < Math.max(1, accountCount)) {
promptCacheKey: effectivePromptCacheKey,
},
);
if (transformedBody && pendingFastSessionInputTrim) {
const activeFastSessionInputTrim = pendingFastSessionInputTrim;
pendingFastSessionInputTrim = undefined;
const compactionResult = await applyResponseCompaction({
body: transformedBody,
requestUrl: url,
headers,
trim: activeFastSessionInputTrim,
fetchImpl: async (requestUrl, requestInit) => {
const normalizedCompactionUrl =
typeof requestUrl === "string"
? requestUrl
: String(requestUrl);
return fetch(
normalizedCompactionUrl,
applyProxyCompatibleInit(
normalizedCompactionUrl,
requestInit,
),
);
},
signal: abortSignal,
timeoutMs: Math.min(fetchTimeoutMs, 4_000),
});
if (compactionResult.mode !== "unchanged") {
transformedBody = compactionResult.body;
requestInit = {
...(requestInit ?? {}),
body: JSON.stringify(transformedBody),
};
}
}
const quotaScheduleKey = `${entitlementAccountKey}:${model ?? modelFamily}`;
const capabilityModelKey = model ?? modelFamily;
const quotaDeferral = preemptiveQuotaScheduler.getDeferral(quotaScheduleKey);
Expand Down
27 changes: 25 additions & 2 deletions lib/request/fetch-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ import { ProxyAgent } from "undici";
import { queuedRefresh } from "../refresh-queue.js";
import { logRequest, logError, logWarn } from "../logger.js";
import { getCodexInstructions, getModelFamily } from "../prompts/codex.js";
import { transformRequestBody, normalizeModel } from "./request-transformer.js";
import {
transformRequestBody,
normalizeModel,
resolveFastSessionInputTrimPlan,
type FastSessionInputTrimPlan,
} from "./request-transformer.js";
import {
attachResponseIdCapture,
convertSseToJson,
Expand Down Expand Up @@ -99,6 +104,12 @@ export interface ResolveUnsupportedCodexFallbackOptions {
customChain?: Record<string, string[]>;
}

export interface TransformRequestForCodexResult {
body: RequestBody;
updatedInit: RequestInit;
deferredFastSessionInputTrim?: FastSessionInputTrimPlan["trim"];
}

function canonicalizeModelName(model: string | undefined): string | undefined {
if (!model) return undefined;
const trimmed = model.trim().toLowerCase();
Expand Down Expand Up @@ -651,8 +662,9 @@ export async function transformRequestForCodex(
fastSession?: boolean;
fastSessionStrategy?: "hybrid" | "always";
fastSessionMaxInputItems?: number;
deferFastSessionInputTrimming?: boolean;
},
): Promise<{ body: RequestBody; updatedInit: RequestInit } | undefined> {
): Promise<TransformRequestForCodexResult | undefined> {
const hasParsedBody =
parsedBody !== undefined &&
parsedBody !== null &&
Expand All @@ -670,6 +682,12 @@ export async function transformRequestForCodex(
body = JSON.parse(init.body) as RequestBody;
}
const originalModel = body.model;
const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
body,
options?.fastSession ?? false,
options?.fastSessionStrategy ?? "hybrid",
options?.fastSessionMaxInputItems ?? 30,
);

// Normalize model first to determine which instructions to fetch
// This ensures we get the correct model-specific prompt
Expand Down Expand Up @@ -700,6 +718,7 @@ export async function transformRequestForCodex(
options?.fastSession ?? false,
options?.fastSessionStrategy ?? "hybrid",
options?.fastSessionMaxInputItems ?? 30,
options?.deferFastSessionInputTrimming ?? false,
);

// Log transformed request
Expand All @@ -720,6 +739,10 @@ export async function transformRequestForCodex(
return {
body: transformedBody,
updatedInit: { ...(init ?? {}), body: JSON.stringify(transformedBody) },
deferredFastSessionInputTrim:
options?.deferFastSessionInputTrimming === true
? fastSessionInputTrimPlan.trim
: undefined,
};
} catch (e) {
logError(`${ERROR_MESSAGES.REQUEST_PARSE_ERROR}`, e);
Expand Down
20 changes: 17 additions & 3 deletions lib/request/helpers/model-map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export type PromptModelFamily =
export interface ModelCapabilities {
toolSearch: boolean;
computerUse: boolean;
compaction: boolean;
}

export interface ModelProfile {
Expand All @@ -48,14 +49,27 @@ const TOOL_CAPABILITIES = {
full: {
toolSearch: true,
computerUse: true,
compaction: true,
},
computerOnly: {
toolSearch: false,
computerUse: true,
compaction: false,
},
computerAndCompact: {
toolSearch: false,
computerUse: true,
compaction: true,
},
compactOnly: {
toolSearch: false,
computerUse: false,
compaction: true,
},
basic: {
toolSearch: false,
computerUse: false,
compaction: false,
},
} as const satisfies Record<string, ModelCapabilities>;

Expand Down Expand Up @@ -103,7 +117,7 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
promptFamily: "gpt-5.2",
defaultReasoningEffort: "high",
supportedReasoningEfforts: ["medium", "high", "xhigh"],
capabilities: TOOL_CAPABILITIES.computerOnly,
capabilities: TOOL_CAPABILITIES.computerAndCompact,
},
"gpt-5.2-pro": {
normalizedModel: "gpt-5.2-pro",
Expand Down Expand Up @@ -145,14 +159,14 @@ export const MODEL_PROFILES: Record<string, ModelProfile> = {
promptFamily: "gpt-5.2",
defaultReasoningEffort: "medium",
supportedReasoningEfforts: ["medium"],
capabilities: TOOL_CAPABILITIES.basic,
capabilities: TOOL_CAPABILITIES.compactOnly,
},
"gpt-5-nano": {
normalizedModel: "gpt-5-nano",
promptFamily: "gpt-5.2",
defaultReasoningEffort: "medium",
supportedReasoningEfforts: ["medium"],
capabilities: TOOL_CAPABILITIES.basic,
capabilities: TOOL_CAPABILITIES.compactOnly,
},
} as const;

Expand Down
64 changes: 54 additions & 10 deletions lib/request/request-transformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export interface TransformRequestBodyParams {
fastSession?: boolean;
fastSessionStrategy?: FastSessionStrategy;
fastSessionMaxInputItems?: number;
deferFastSessionInputTrimming?: boolean;
}

const PLAN_MODE_ONLY_TOOLS = new Set(["request_user_input"]);
Expand Down Expand Up @@ -482,6 +483,15 @@ export function trimInputForFastSession(
return trimmed.slice(trimmed.length - safeMax);
}

export interface FastSessionInputTrimPlan {
shouldApply: boolean;
isTrivialTurn: boolean;
trim?: {
maxItems: number;
preferLatestUserOnly: boolean;
};
}

function isTrivialLatestPrompt(text: string): boolean {
const normalized = text.trim();
if (!normalized) return false;
Expand Down Expand Up @@ -540,6 +550,33 @@ function isComplexFastSessionRequest(
return false;
}

export function resolveFastSessionInputTrimPlan(
body: RequestBody,
fastSession: boolean,
fastSessionStrategy: FastSessionStrategy,
fastSessionMaxInputItems: number,
): FastSessionInputTrimPlan {
const shouldApplyFastSessionTuning =
fastSession &&
(fastSessionStrategy === "always" ||
!isComplexFastSessionRequest(body, fastSessionMaxInputItems));
const latestUserText = getLatestUserText(body.input);
const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
const shouldPreferLatestUserOnly =
shouldApplyFastSessionTuning && isTrivialTurn;

return {
shouldApply: shouldApplyFastSessionTuning,
isTrivialTurn,
trim: shouldApplyFastSessionTuning
? {
maxItems: fastSessionMaxInputItems,
preferLatestUserOnly: shouldPreferLatestUserOnly,
}
: undefined,
};
}

function getLatestUserText(input: InputItem[] | undefined): string | undefined {
if (!Array.isArray(input)) return undefined;
for (let i = input.length - 1; i >= 0; i--) {
Expand Down Expand Up @@ -672,6 +709,7 @@ export async function transformRequestBody(
fastSession?: boolean,
fastSessionStrategy?: FastSessionStrategy,
fastSessionMaxInputItems?: number,
deferFastSessionInputTrimming?: boolean,
): Promise<RequestBody>;
export async function transformRequestBody(
bodyOrParams: RequestBody | TransformRequestBodyParams,
Expand All @@ -681,6 +719,7 @@ export async function transformRequestBody(
fastSession = false,
fastSessionStrategy: FastSessionStrategy = "hybrid",
fastSessionMaxInputItems = 30,
deferFastSessionInputTrimming = false,
): Promise<RequestBody> {
const useNamedParams =
typeof codexInstructions === "undefined" &&
Expand All @@ -695,6 +734,7 @@ export async function transformRequestBody(
let resolvedFastSession: boolean;
let resolvedFastSessionStrategy: FastSessionStrategy;
let resolvedFastSessionMaxInputItems: number;
let resolvedDeferFastSessionInputTrimming: boolean;

if (useNamedParams) {
const namedParams = bodyOrParams as TransformRequestBodyParams;
Expand All @@ -705,6 +745,8 @@ export async function transformRequestBody(
resolvedFastSession = namedParams.fastSession ?? false;
resolvedFastSessionStrategy = namedParams.fastSessionStrategy ?? "hybrid";
resolvedFastSessionMaxInputItems = namedParams.fastSessionMaxInputItems ?? 30;
resolvedDeferFastSessionInputTrimming =
namedParams.deferFastSessionInputTrimming ?? false;
} else {
body = bodyOrParams as RequestBody;
resolvedCodexInstructions = codexInstructions;
Expand All @@ -713,6 +755,7 @@ export async function transformRequestBody(
resolvedFastSession = fastSession;
resolvedFastSessionStrategy = fastSessionStrategy;
resolvedFastSessionMaxInputItems = fastSessionMaxInputItems;
resolvedDeferFastSessionInputTrimming = deferFastSessionInputTrimming;
}

if (!body || typeof body !== "object") {
Expand Down Expand Up @@ -747,17 +790,17 @@ export async function transformRequestBody(
const reasoningModel = shouldUseNormalizedReasoningModel
? normalizedModel
: lookupModel;
const shouldApplyFastSessionTuning =
resolvedFastSession &&
(resolvedFastSessionStrategy === "always" ||
!isComplexFastSessionRequest(body, resolvedFastSessionMaxInputItems));
const latestUserText = getLatestUserText(body.input);
const isTrivialTurn = isTrivialLatestPrompt(latestUserText ?? "");
const fastSessionInputTrimPlan = resolveFastSessionInputTrimPlan(
body,
resolvedFastSession,
resolvedFastSessionStrategy,
resolvedFastSessionMaxInputItems,
);
const shouldApplyFastSessionTuning = fastSessionInputTrimPlan.shouldApply;
const isTrivialTurn = fastSessionInputTrimPlan.isTrivialTurn;
const shouldDisableToolsForTrivialTurn =
shouldApplyFastSessionTuning &&
isTrivialTurn;
const shouldPreferLatestUserOnly =
shouldApplyFastSessionTuning && isTrivialTurn;

// Codex required fields
// ChatGPT backend REQUIRES store=false (confirmed via testing)
Expand Down Expand Up @@ -789,10 +832,11 @@ export async function transformRequestBody(
if (body.input && Array.isArray(body.input)) {
let inputItems: InputItem[] = body.input;

if (shouldApplyFastSessionTuning) {
if (shouldApplyFastSessionTuning && !resolvedDeferFastSessionInputTrimming) {
inputItems =
trimInputForFastSession(inputItems, resolvedFastSessionMaxInputItems, {
preferLatestUserOnly: shouldPreferLatestUserOnly,
preferLatestUserOnly:
fastSessionInputTrimPlan.trim?.preferLatestUserOnly ?? false,
}) ?? inputItems;
}

Expand Down
Loading