diff --git a/src/common/utils/ai/modelCapabilities.test.ts b/src/common/utils/ai/modelCapabilities.test.ts index a902801b38..d665ae7c92 100644 --- a/src/common/utils/ai/modelCapabilities.test.ts +++ b/src/common/utils/ai/modelCapabilities.test.ts @@ -1,5 +1,10 @@ import { describe, expect, it } from "bun:test"; -import { getModelCapabilities, getSupportedInputMediaTypes } from "./modelCapabilities"; +import { + getModelCapabilities, + getSupportedEndpoints, + getSupportedEndpointsResolved, + getSupportedInputMediaTypes, +} from "./modelCapabilities"; describe("getModelCapabilities", () => { it("returns capabilities for known models", () => { @@ -51,6 +56,15 @@ describe("getModelCapabilities", () => { it("returns null for unknown models", () => { expect(getModelCapabilities("anthropic:this-model-does-not-exist")).toBeNull(); }); + + it("inherits bare-model capabilities when provider-scoped entry omits them", () => { + // github_copilot/gpt-4o in models.json lacks supports_pdf_input, + // but bare gpt-4o has it. The merge-across-keys strategy must fill + // in the missing field from the bare model. + const caps = getModelCapabilities("github-copilot:gpt-4o"); + expect(caps).not.toBeNull(); + expect(caps?.supportsPdfInput).toBe(true); + }); }); describe("getSupportedInputMediaTypes", () => { @@ -66,3 +80,97 @@ describe("getSupportedInputMediaTypes", () => { expect(supported?.has("pdf")).toBe(true); }); }); + +describe("getSupportedEndpoints", () => { + it("returns endpoints for a responses-only model", () => { + // gpt-5.4-pro in models-extra has supported_endpoints: ["/v1/responses"] + const endpoints = getSupportedEndpoints("openai:gpt-5.4-pro"); + expect(endpoints).toEqual(["/v1/responses"]); + }); + + it("returns endpoints for a chat-only Copilot model", () => { + // github_copilot/claude-sonnet-4 in models.json has supported_endpoints: ["/v1/chat/completions"] + const endpoints = getSupportedEndpoints("github-copilot:claude-sonnet-4"); + expect(endpoints).toEqual(["/v1/chat/completions"]); + }); + + it("returns both endpoints for a model supporting chat and responses", () => { + // gpt-5.4 in models-extra has supported_endpoints: ["/v1/chat/completions", "/v1/responses"] + const endpoints = getSupportedEndpoints("openai:gpt-5.4"); + expect(endpoints).toContain("/v1/chat/completions"); + expect(endpoints).toContain("/v1/responses"); + }); + + it("returns endpoints for Copilot model using provider alias lookup", () => { + // github_copilot/gpt-5.2 in models.json has both endpoints + const endpoints = getSupportedEndpoints("github-copilot:gpt-5.2"); + expect(endpoints).toContain("/v1/chat/completions"); + expect(endpoints).toContain("/v1/responses"); + }); + + it("prefers provider-scoped endpoints over bare model endpoints", () => { + // bare "gpt-5.2" includes /v1/batch, but github_copilot/gpt-5.2 does not. + // The provider-scoped entry should win when queried with a provider prefix. + const endpoints = getSupportedEndpoints("github-copilot:gpt-5.2"); + expect(endpoints).not.toContain("/v1/batch"); + + // Sanity: the bare model does include /v1/batch + const bareEndpoints = getSupportedEndpoints("gpt-5.2"); + expect(bareEndpoints).toContain("/v1/batch"); + }); + + it("returns null when model metadata exists but has no endpoint info", () => { + // claude-opus-4-5 in models-extra has no supported_endpoints + const endpoints = getSupportedEndpoints("anthropic:claude-opus-4-5"); + expect(endpoints).toBeNull(); + }); + + it("returns null for completely unknown models", () => { + expect(getSupportedEndpoints("unknown:does-not-exist")).toBeNull(); + }); +}); + +describe("getSupportedEndpointsResolved", () => { + it("resolves Copilot model with provider-scoped metadata", () => { + // github_copilot/gpt-5.1-codex-max in models.json has supported_endpoints: ["/v1/responses"] + const endpoints = getSupportedEndpointsResolved("github-copilot:gpt-5.1-codex-max", null); + expect(endpoints).toEqual(["/v1/responses"]); + }); + + it("prefers provider-scoped endpoints over bare model in resolved path", () => { + // github_copilot/gpt-5.2 restricts to chat+responses (no /v1/batch), + // while bare gpt-5.2 includes /v1/batch. Provider-scoped must win. + const endpoints = getSupportedEndpointsResolved("github-copilot:gpt-5.2", null); + expect(endpoints).toContain("/v1/chat/completions"); + expect(endpoints).toContain("/v1/responses"); + expect(endpoints).not.toContain("/v1/batch"); + }); + + it("falls back to bare model name when provider-scoped entry is missing", () => { + // github_copilot/gpt-5.4 does NOT exist in models.json, but + // bare "gpt-5.4" in models-extra has supported_endpoints. + const endpoints = getSupportedEndpointsResolved("github-copilot:gpt-5.4", null); + expect(endpoints).toContain("/v1/responses"); + }); + + it("resolves endpoints via config-based mappedToModel alias", () => { + // "custom-copilot-alias" has no provider-scoped or bare-model metadata, + // but the providers config maps it to gpt-5.4 which has known endpoints. + const config = { + "github-copilot": { + models: [{ id: "custom-copilot-alias", mappedToModel: "gpt-5.4" }], + }, + }; + const endpoints = getSupportedEndpointsResolved("github-copilot:custom-copilot-alias", config); + expect(endpoints).toContain("/v1/responses"); + }); + + it("returns null for unknown model when config has no mapping", () => { + // Without config, the same unknown model returns null. + expect(getSupportedEndpointsResolved("github-copilot:custom-copilot-alias", null)).toBeNull(); + }); + + it("returns null for unknown model without any metadata", () => { + expect(getSupportedEndpointsResolved("github-copilot:totally-fake-model", null)).toBeNull(); + }); +}); diff --git a/src/common/utils/ai/modelCapabilities.ts b/src/common/utils/ai/modelCapabilities.ts index 952d840050..f919a5cfa6 100644 --- a/src/common/utils/ai/modelCapabilities.ts +++ b/src/common/utils/ai/modelCapabilities.ts @@ -1,4 +1,4 @@ -import type { ProvidersConfigMap } from "@/common/orpc/types"; +import type { ProvidersConfigWithModels } from "@/common/utils/providers/modelEntries"; import { resolveModelForMetadata } from "@/common/utils/providers/modelEntries"; import modelsData from "../tokens/models.json"; import { modelsExtra } from "../tokens/models-extra"; @@ -11,6 +11,7 @@ interface RawModelCapabilitiesData { supports_video_input?: boolean; max_pdf_size_mb?: number; litellm_provider?: string; + supported_endpoints?: string[]; [key: string]: unknown; } @@ -41,11 +42,12 @@ function generateLookupKeys(modelString: string): string[] { const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString; const litellmProvider = PROVIDER_KEY_ALIASES[provider] ?? provider; - const keys: string[] = [ - modelName, // Direct model name (e.g., "claude-opus-4-5") - ]; + const keys: string[] = []; if (provider) { + // Provider-scoped keys first so provider-specific metadata (e.g. + // `github_copilot/gpt-5.2` restricting `/v1/batch`) wins over the + // generic bare-model entry. keys.push( `${litellmProvider}/${modelName}`, // "ollama/gpt-oss:20b" `${litellmProvider}/${modelName}-cloud` // "ollama/gpt-oss:20b-cloud" (LiteLLM convention) @@ -59,6 +61,9 @@ function generateLookupKeys(modelString: string): string[] { } } + // Bare model name is the last-resort fallback. + keys.push(modelName); + return keys; } @@ -90,25 +95,34 @@ export function getModelCapabilities(modelString: string): ModelCapabilities | n const modelsExtraRecord = modelsExtra as unknown as Record; const modelsDataRecord = modelsData as unknown as Record; - // Merge models.json (upstream) + models-extra.ts (local overrides). Extras win. - // This avoids wiping capabilities (e.g. PDF support) when modelsExtra only overrides - // pricing/token limits. + // Merge across ALL matching lookup keys so provider-scoped entries (first + // in lookup order) override specific fields while bare-model entries fill + // in capabilities the provider-scoped entry omits (e.g. github_copilot/gpt-4o + // lacks supports_pdf_input but bare gpt-4o has it). + // Within each key, modelsExtra wins over modelsData (upstream). + let merged: RawModelCapabilitiesData | null = null; for (const key of lookupKeys) { const base = modelsDataRecord[key]; const extra = modelsExtraRecord[key]; if (base || extra) { - const merged: RawModelCapabilitiesData = { ...(base ?? {}), ...(extra ?? {}) }; - return extractModelCapabilities(merged); + const keyData: RawModelCapabilitiesData = Object.assign({}, base ?? {}, extra ?? {}); + if (merged != null) { + // Earlier keys (provider-scoped) take priority; later keys (bare model) + // fill gaps but don't override. + merged = Object.assign({}, keyData, merged); + } else { + merged = keyData; + } } } - return null; + return merged ? extractModelCapabilities(merged) : null; } export function getModelCapabilitiesResolved( modelString: string, - providersConfig: ProvidersConfigMap | null + providersConfig: ProvidersConfigWithModels | null ): ModelCapabilities | null { const metadataModel = resolveModelForMetadata(modelString, providersConfig); return getModelCapabilities(metadataModel); @@ -127,3 +141,57 @@ export function getSupportedInputMediaTypes( if (caps.supportsVideoInput) result.add("video"); return result; } + +/** + * Resolve supported API endpoints for a model string from static metadata. + * + * Returns the `supported_endpoints` array (e.g. `["/v1/responses"]`) when + * found in models-extra or models.json, or `null` when no metadata exists + * or the metadata lacks endpoint information. + */ +export function getSupportedEndpoints(modelString: string): string[] | null { + const normalized = normalizeToCanonical(modelString); + const lookupKeys = generateLookupKeys(normalized); + + const modelsExtraRecord = modelsExtra as unknown as Record; + const modelsDataRecord = modelsData as unknown as Record; + + for (const key of lookupKeys) { + const base = modelsDataRecord[key]; + const extra = modelsExtraRecord[key]; + + if (base || extra) { + // Extra wins for the same field; merge so we don't lose base-only endpoints. + const merged: RawModelCapabilitiesData = { ...(base ?? {}), ...(extra ?? {}) }; + return merged.supported_endpoints ?? null; + } + } + + return null; +} + +/** + * Like `getSupportedEndpoints`, but first resolves config aliases + * (e.g. `mappedToModel`) so gateway-scoped model IDs inherit metadata + * from the underlying model when the gateway-scoped key has no entry. + */ +export function getSupportedEndpointsResolved( + modelString: string, + providersConfig: ProvidersConfigWithModels | null +): string[] | null { + // Try the raw (possibly gateway-scoped) key first so provider-specific + // endpoint overrides (e.g. `github_copilot/gpt-5.4`) take priority. + const direct = getSupportedEndpoints(modelString); + if (direct != null) { + return direct; + } + + // Fall back to the metadata-resolved alias (e.g. mappedToModel) so + // models without a provider-scoped entry inherit from the bare model. + const metadataModel = resolveModelForMetadata(modelString, providersConfig); + if (metadataModel !== modelString) { + return getSupportedEndpoints(metadataModel); + } + + return null; +} diff --git a/src/common/utils/providers/modelEntries.ts b/src/common/utils/providers/modelEntries.ts index 03841c57d6..54588d02be 100644 --- a/src/common/utils/providers/modelEntries.ts +++ b/src/common/utils/providers/modelEntries.ts @@ -1,6 +1,16 @@ -import type { ProviderModelEntry, ProvidersConfigMap } from "@/common/orpc/types"; +import type { ProviderModelEntry } from "@/common/orpc/types"; import { normalizeToCanonical } from "@/common/utils/ai/models"; +/** + * Minimal providers-config shape needed for model-entry lookup. + * Both the raw disk config (`ProvidersConfig`) and the API-facing map + * (`ProvidersConfigMap`) satisfy this, so callers don't need to convert. + */ +export type ProvidersConfigWithModels = Record< + string, + { models?: ProviderModelEntry[] } | undefined +>; + interface ParsedProviderModelId { provider: string; modelId: string; @@ -37,7 +47,7 @@ function parseProviderModelId(fullModelId: string): ParsedProviderModelId | null } function findProviderModelEntry( - providersConfig: ProvidersConfigMap | null, + providersConfig: ProvidersConfigWithModels | null, provider: string, modelId: string ): ProviderModelEntry | null { @@ -65,7 +75,7 @@ function findProviderModelEntry( */ function findProviderModelEntryScoped( fullModelId: string, - providersConfig: ProvidersConfigMap | null + providersConfig: ProvidersConfigWithModels | null ): ProviderModelEntry | null { const rawParsed = parseProviderModelId(fullModelId); if (rawParsed) { @@ -94,7 +104,7 @@ function findProviderModelEntryScoped( export function getModelContextWindowOverride( fullModelId: string, - providersConfig: ProvidersConfigMap | null + providersConfig: ProvidersConfigWithModels | null ): number | null { const entry = findProviderModelEntryScoped(fullModelId, providersConfig); return entry ? getProviderModelEntryContextWindowTokens(entry) : null; @@ -102,7 +112,7 @@ export function getModelContextWindowOverride( export function resolveModelForMetadata( fullModelId: string, - providersConfig: ProvidersConfigMap | null + providersConfig: ProvidersConfigWithModels | null ): string { const entry = findProviderModelEntryScoped(fullModelId, providersConfig); return (entry ? getProviderModelEntryMappedTo(entry) : null) ?? fullModelId; diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 9c417abc2c..9177977233 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -113,6 +113,7 @@ export const modelsExtra: Record = { supports_vision: true, supports_reasoning: true, supports_response_schema: true, + supported_endpoints: ["/v1/chat/completions", "/v1/responses"], knowledge_cutoff: "2025-08-31", }, diff --git a/src/node/services/__tests__/copilotResponsesModel.test.ts b/src/node/services/__tests__/copilotResponsesModel.test.ts new file mode 100644 index 0000000000..68de7beef5 --- /dev/null +++ b/src/node/services/__tests__/copilotResponsesModel.test.ts @@ -0,0 +1,1100 @@ +import { describe, expect, it } from "bun:test"; +import type { + LanguageModelV3, + LanguageModelV3CallOptions, + LanguageModelV3FinishReason, + LanguageModelV3StreamPart, + LanguageModelV3StreamResult, + LanguageModelV3Usage, +} from "@ai-sdk/provider"; +import { wrapCopilotResponsesModel } from "../copilotResponsesModel"; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +/** Minimal stub LanguageModelV3 whose doStream returns a hand-crafted stream. */ +function createStubModel( + streamParts: LanguageModelV3StreamPart[] +): LanguageModelV3 & { lastStreamOptions: LanguageModelV3CallOptions | null } { + const stub = { + specificationVersion: "v3" as const, + provider: "github-copilot.responses", + modelId: "gpt-5.2", + supportedUrls: {}, + lastStreamOptions: null as LanguageModelV3CallOptions | null, + + doGenerate: () => { + throw new Error("doGenerate not implemented in stub"); + }, + + doStream(options: LanguageModelV3CallOptions): PromiseLike { + stub.lastStreamOptions = options; + return Promise.resolve({ + stream: new ReadableStream({ + start(controller) { + for (const part of streamParts) { + controller.enqueue(part); + } + controller.close(); + }, + }), + }); + }, + }; + return stub; +} + +/** Collect all parts from a ReadableStream. */ +async function collectStream( + stream: ReadableStream +): Promise { + const parts: LanguageModelV3StreamPart[] = []; + const reader = stream.getReader(); + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + parts.push(value); + } + return parts; +} + +/** Build a raw chunk wrapping a Responses SSE event. */ +function raw(event: Record): LanguageModelV3StreamPart { + return { type: "raw", rawValue: event }; +} + +/** Build a V3-compliant usage object for tests. */ +function makeUsage(inputTotal: number, outputTotal: number): LanguageModelV3Usage { + return { + inputTokens: { + total: inputTotal, + noCache: undefined, + cacheRead: undefined, + cacheWrite: undefined, + }, + outputTokens: { total: outputTotal, text: undefined, reasoning: undefined }, + }; +} + +/** Build a finish stream part with V3-compliant usage. */ +function finish( + reason: LanguageModelV3FinishReason["unified"], + inputTotal: number, + outputTotal: number +): LanguageModelV3StreamPart { + // Use a typed variable to satisfy consistent-type-assertions lint rule. + const part: LanguageModelV3StreamPart = { + type: "finish", + finishReason: { unified: reason, raw: reason }, + usage: makeUsage(inputTotal, outputTotal), + }; + return part; +} + +// --------------------------------------------------------------------------- +// Minimal stub call options +// --------------------------------------------------------------------------- +const baseOptions: LanguageModelV3CallOptions = { + prompt: [], +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("wrapCopilotResponsesModel", () => { + describe("text lifecycle", () => { + it("rebuilds coherent text-start / text-delta / text-end from raw events", async () => { + const messageItemId = "msg_001"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Hello", + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: " world", + }), + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + // Should see coherent lifecycle from our wrapper + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "text-start", // from response.output_item.added (message) + "text-delta", // from response.output_text.delta "Hello" + "text-delta", // from response.output_text.delta " world" + "text-end", // from response.output_item.done (message) + "finish", + ]); + + // Verify text content + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toEqual(["Hello", " world"]); + }); + + it("handles orphaned text deltas (delta before output_item.added)", async () => { + const messageItemId = "msg_orphan"; + const stub = createStubModel([ + // Copilot sends text delta BEFORE output_item.added — the wrapper should + // defensively emit text-start when it sees the first delta for an unknown item. + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Early", + }), + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: " text", + }), + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 5, 3), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + // The first delta triggers a defensive text-start. + // The output_item.added for message also triggers text-start (for a new key). + // But both share the same itemId, so output_item.done closes them. + expect(types).toContain("text-start"); + expect(types).toContain("text-delta"); + expect(types).toContain("text-end"); + expect(types).toContain("finish"); + + // The deltas should both be present + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toContain("Early"); + expect(deltas.map((d) => d.delta)).toContain(" text"); + }); + + // Regression: flush() must emit text-end with the same composite id used + // by text-start, so downstream consumers (e.g. DevTools) can match them. + it("flush emits text-end with the same composite id as text-start", async () => { + const messageItemId = "msg_flush_regression"; + const expectedId = `${messageItemId}:0`; + // Stream that opens a text part but never closes it — no content_part.done + // or output_item.done, just a finish event followed by stream close. + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Hello world", + }), + // Stream ends here — no content_part.done, no output_item.done. + finish("stop", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + // The flush fallback must close the open text part. + const textEnds = parts.filter((p) => p.type === "text-end") as Array<{ + type: "text-end"; + id: string; + }>; + expect(textEnds).toHaveLength(1); + expect(textEnds[0].id).toBe(expectedId); + + // Verify the full lifecycle is coherent: text-start, text-delta, text-end + // all use the same composite id. + const textStarts = parts.filter((p) => p.type === "text-start") as Array<{ + type: "text-start"; + id: string; + }>; + expect(textStarts).toHaveLength(1); + expect(textStarts[0].id).toBe(expectedId); + + const textDeltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + id: string; + delta: string; + }>; + expect(textDeltas).toHaveLength(1); + expect(textDeltas[0].id).toBe(expectedId); + expect(textDeltas[0].delta).toBe("Hello world"); + }); + }); + + describe("inner semantic part suppression", () => { + it("suppresses inner text-start/text-delta/text-end and rebuilds from raw", async () => { + // Simulates what the real inner model emits: both raw events AND semantic parts. + // The wrapper should suppress the semantic parts and only emit its own rebuilds. + const innerTextStart: LanguageModelV3StreamPart = { type: "text-start", id: "msg_sup" }; + const innerTextDelta: LanguageModelV3StreamPart = { + type: "text-delta", + id: "msg_sup", + delta: "Hi", + }; + const innerTextEnd: LanguageModelV3StreamPart = { type: "text-end", id: "msg_sup" }; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "msg_sup", type: "message" }, + output_index: 0, + }), + innerTextStart, + raw({ + type: "response.output_text.delta", + item_id: "msg_sup", + content_index: 0, + delta: "Hi", + }), + innerTextDelta, + raw({ + type: "response.output_item.done", + item: { id: "msg_sup", type: "message" }, + output_index: 0, + }), + innerTextEnd, + finish("stop", 1, 1), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + // Should have exactly one text-start, one text-delta, one text-end — no duplicates + const types = parts.map((p) => p.type); + expect(types).toEqual(["text-start", "text-delta", "text-end", "finish"]); + }); + + it("suppresses inner tool-input-start/delta/end/tool-call and rebuilds from raw", async () => { + const innerToolStart: LanguageModelV3StreamPart = { + type: "tool-input-start", + id: "call_sup", + toolName: "bash", + }; + const innerToolDelta: LanguageModelV3StreamPart = { + type: "tool-input-delta", + id: "call_sup", + delta: "{}", + }; + const innerToolEnd: LanguageModelV3StreamPart = { type: "tool-input-end", id: "call_sup" }; + const innerToolCall: LanguageModelV3StreamPart = { + type: "tool-call", + toolCallId: "call_sup", + toolName: "bash", + input: "{}", + }; + + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "fc_sup", type: "function_call", call_id: "call_sup", name: "bash" }, + output_index: 0, + }), + innerToolStart, + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: "{}" }), + innerToolDelta, + raw({ + type: "response.output_item.done", + item: { + id: "fc_sup", + type: "function_call", + call_id: "call_sup", + name: "bash", + arguments: "{}", + }, + output_index: 0, + }), + innerToolEnd, + innerToolCall, + finish("tool-calls", 1, 1), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + // Should have exactly one of each — no duplicates from inner parts + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "tool-input-start", + "tool-input-delta", + "tool-input-end", + "tool-call", + "finish", + ]); + }); + }); + + describe("tool call lifecycle", () => { + it("rebuilds tool-input-start / delta / end / tool-call from raw events", async () => { + const callId = "call_abc123"; + const toolName = "bash"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "fc_001", type: "function_call", call_id: callId, name: toolName }, + output_index: 0, + }), + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: '{"script' }), + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: '":"ls"}' }), + raw({ + type: "response.output_item.done", + item: { + id: "fc_001", + type: "function_call", + call_id: callId, + name: toolName, + arguments: '{"script":"ls"}', + }, + output_index: 0, + }), + finish("tool-calls", 20, 10), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "tool-input-start", + "tool-input-delta", + "tool-input-delta", + "tool-input-end", + "tool-call", + "finish", + ]); + + // Verify tool-call has correct args from output_item.done + const toolCall = parts.find((p) => p.type === "tool-call") as { + type: "tool-call"; + toolCallId: string; + toolName: string; + input: string; + }; + expect(toolCall.toolCallId).toBe(callId); + expect(toolCall.toolName).toBe(toolName); + expect(toolCall.input).toBe('{"script":"ls"}'); + }); + }); + + describe("finish / usage passthrough", () => { + it("passes through finish and response-metadata unchanged", async () => { + const responseMetadata: LanguageModelV3StreamPart = { + type: "response-metadata", + id: "resp_001", + timestamp: new Date("2024-01-01"), + modelId: "gpt-5.2", + }; + const stub = createStubModel([responseMetadata, finish("stop", 100, 50)]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + expect(parts).toHaveLength(2); + expect(parts[0].type).toBe("response-metadata"); + expect(parts[1].type).toBe("finish"); + + const finishPart = parts[1] as { type: "finish"; usage: unknown; finishReason: unknown }; + expect(finishPart.usage).toEqual(makeUsage(100, 50)); + }); + }); + + describe("terminal failure", () => { + it("emits error part from response.failed raw event", async () => { + const stub = createStubModel([ + raw({ + type: "response.failed", + response: { error: { message: "Rate limit exceeded", code: "rate_limit" } }, + }), + finish("error", 0, 0), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const errorPart = parts.find((p) => p.type === "error"); + expect(errorPart).toBeDefined(); + expect((errorPart as { type: "error"; error: unknown }).error).toEqual({ + message: "Rate limit exceeded", + code: "rate_limit", + }); + }); + }); + + describe("raw chunk passthrough", () => { + it("forwards raw chunks when outer caller requested them", async () => { + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "msg_r", type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: "msg_r", + content_index: 0, + delta: "Hi", + }), + raw({ + type: "response.output_item.done", + item: { id: "msg_r", type: "message" }, + output_index: 0, + }), + finish("stop", 1, 1), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream({ ...baseOptions, includeRawChunks: true }); + const parts = await collectStream(result.stream); + + // Should include raw chunks alongside rebuilt semantic parts + const rawParts = parts.filter((p) => p.type === "raw"); + expect(rawParts.length).toBeGreaterThanOrEqual(3); + + // Should also include rebuilt semantic parts + const types = parts.filter((p) => p.type !== "raw").map((p) => p.type); + expect(types).toEqual(["text-start", "text-delta", "text-end", "finish"]); + }); + + it("suppresses raw chunks when outer caller did not request them", async () => { + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "msg_s", type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: "msg_s", + content_index: 0, + delta: "Hi", + }), + raw({ + type: "response.output_item.done", + item: { id: "msg_s", type: "message" }, + output_index: 0, + }), + finish("stop", 1, 1), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); // no includeRawChunks + const parts = await collectStream(result.stream); + + // Should NOT include any raw chunks + expect(parts.filter((p) => p.type === "raw")).toHaveLength(0); + + // Should include rebuilt semantic parts + const types = parts.map((p) => p.type); + expect(types).toEqual(["text-start", "text-delta", "text-end", "finish"]); + }); + }); + + describe("includeRawChunks forwarding", () => { + it("always passes includeRawChunks: true to the inner model", async () => { + const stub = createStubModel([finish("stop", 0, 0)]); + + const wrapped = wrapCopilotResponsesModel(stub); + + // Call without includeRawChunks + await wrapped.doStream(baseOptions); + expect(stub.lastStreamOptions?.includeRawChunks).toBe(true); + }); + }); + + describe("doGenerate passthrough", () => { + it("delegates doGenerate to the inner model unchanged", async () => { + const expectedResult = { + content: [], + text: "test", + usage: makeUsage(5, 3), + finishReason: { unified: "stop" as const, raw: "stop" }, + response: {}, + warnings: [], + }; + + const stub = createStubModel([]); + stub.doGenerate = () => Promise.resolve(expectedResult) as never; + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doGenerate(baseOptions); + expect(result).toBe(expectedResult); + }); + }); + + describe("content_part lifecycle events", () => { + it("rebuilds text lifecycle from content_part.added / output_text.done / content_part.done", async () => { + const messageItemId = "msg_cp"; + const stub = createStubModel([ + // output_item.added for the message + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + // content_part.added with output_text type — should emit text-start + // (but output_item.added already started one at index 0, so this is + // for a second content part at index 1) + raw({ + type: "response.content_part.added", + item_id: messageItemId, + content_index: 1, + part: { type: "output_text", text: "" }, + }), + // Text deltas for second content part + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 1, + delta: "Hello", + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 1, + delta: " world", + }), + // output_text.done reconciliation + raw({ + type: "response.output_text.done", + item_id: messageItemId, + content_index: 1, + text: "Hello world", + }), + // content_part.done closes the text part + raw({ + type: "response.content_part.done", + item_id: messageItemId, + content_index: 1, + part: { type: "output_text", text: "Hello world" }, + }), + // Close any remaining text from output_item + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + + // First text-start from output_item.added (index 0), second from content_part.added (index 1) + expect(types).toEqual([ + "text-start", // from output_item.added (message, content_index 0) + "text-start", // from content_part.added (output_text, content_index 1) + "text-delta", // "Hello" + "text-delta", // " world" + "text-end", // from content_part.done (content_index 1) + "text-end", // from output_item.done fallback (content_index 0) + "finish", + ]); + + // Each content index gets a unique composite id so downstream consumers + // (e.g. DevTools) can track them independently. + const textStarts = parts.filter((p) => p.type === "text-start") as Array<{ + type: "text-start"; + id: string; + }>; + expect(textStarts[0].id).toBe(`${messageItemId}:0`); + expect(textStarts[1].id).toBe(`${messageItemId}:1`); + + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + id: string; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toEqual(["Hello", " world"]); + // Deltas for content_index 1 + expect(deltas[0].id).toBe(`${messageItemId}:1`); + expect(deltas[1].id).toBe(`${messageItemId}:1`); + }); + + it("content_part.added emits text-start and initial delta when part carries text", async () => { + const messageItemId = "msg_cp_init"; + const stub = createStubModel([ + raw({ + type: "response.content_part.added", + item_id: messageItemId, + content_index: 0, + part: { type: "output_text", text: "Initial" }, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: " more", + }), + raw({ + type: "response.content_part.done", + item_id: messageItemId, + content_index: 0, + part: { type: "output_text", text: "Initial more" }, + }), + finish("stop", 5, 3), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "text-start", // from content_part.added + "text-delta", // "Initial" (initial text from content_part.added) + "text-delta", // " more" (from output_text.delta) + "text-end", // from content_part.done + "finish", + ]); + + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toEqual(["Initial", " more"]); + }); + + it("ignores content_part.added for non-output_text types", async () => { + const messageItemId = "msg_cp_other"; + const stub = createStubModel([ + // A content_part.added with an unsupported type should not trigger text-start + raw({ + type: "response.content_part.added", + item_id: messageItemId, + content_index: 0, + part: { type: "refusal", refusal: "I cannot help with that" }, + }), + finish("stop", 5, 1), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + expect(types).toEqual(["finish"]); + }); + }); + + describe("output_text.done reconciliation", () => { + it("emits trailing text delta when output_text.done has more text than accumulated deltas", async () => { + const messageItemId = "msg_reconcile"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Hello", + }), + // output_text.done has final text that includes trailing content not in deltas + raw({ + type: "response.output_text.done", + item_id: messageItemId, + content_index: 0, + text: "Hello world", + }), + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "text-start", + "text-delta", // "Hello" from delta + "text-delta", // " world" reconciled from output_text.done + "text-end", // from output_item.done fallback + "finish", + ]); + + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toEqual(["Hello", " world"]); + }); + + it("does not emit extra delta when output_text.done matches accumulated text", async () => { + const messageItemId = "msg_no_reconcile"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Hello world", + }), + // output_text.done matches exactly — no reconciliation needed + raw({ + type: "response.output_text.done", + item_id: messageItemId, + content_index: 0, + text: "Hello world", + }), + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + // Only one text-delta — no extra reconciliation delta + expect(types).toEqual(["text-start", "text-delta", "text-end", "finish"]); + + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas).toHaveLength(1); + expect(deltas[0].delta).toBe("Hello world"); + }); + }); + + describe("function_call_arguments.done lifecycle", () => { + it("finalizes tool call from arguments.done, making output_item.done a no-op", async () => { + const callId = "call_argsdone"; + const toolName = "bash"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "fc_ad", type: "function_call", call_id: callId, name: toolName }, + output_index: 0, + }), + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: '{"script' }), + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: '":"ls"}' }), + // function_call_arguments.done delivers final args and finalizes the tool + raw({ + type: "response.function_call_arguments.done", + output_index: 0, + arguments: '{"script":"ls"}', + }), + // output_item.done should be a no-op since args-done already finalized + raw({ + type: "response.output_item.done", + item: { + id: "fc_ad", + type: "function_call", + call_id: callId, + name: toolName, + arguments: '{"script":"ls"}', + }, + output_index: 0, + }), + finish("tool-calls", 20, 10), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + // Only one tool-input-end and one tool-call — not doubled by output_item.done + expect(types).toEqual([ + "tool-input-start", + "tool-input-delta", + "tool-input-delta", + "tool-input-end", + "tool-call", + "finish", + ]); + + const toolCall = parts.find((p) => p.type === "tool-call") as { + type: "tool-call"; + toolCallId: string; + toolName: string; + input: string; + }; + expect(toolCall.toolCallId).toBe(callId); + expect(toolCall.toolName).toBe(toolName); + expect(toolCall.input).toBe('{"script":"ls"}'); + }); + + it("reconciles arguments from done event when deltas were incomplete", async () => { + const callId = "call_reconcile_args"; + const toolName = "file_read"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: "fc_ra", type: "function_call", call_id: callId, name: toolName }, + output_index: 0, + }), + // Only partial arguments via delta + raw({ type: "response.function_call_arguments.delta", output_index: 0, delta: '{"path":' }), + // Done event carries the full arguments + raw({ + type: "response.function_call_arguments.done", + output_index: 0, + arguments: '{"path":"src/test.ts"}', + }), + finish("tool-calls", 10, 5), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const toolCall = parts.find((p) => p.type === "tool-call") as { + type: "tool-call"; + input: string; + }; + // Final input should be from the done event, not the incomplete deltas + expect(toolCall.input).toBe('{"path":"src/test.ts"}'); + }); + }); + + describe("content_part.done closes text before output_item.done", () => { + it("content_part.done emits text-end so output_item.done does not double-close", async () => { + const messageItemId = "msg_cp_first"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Hi", + }), + // content_part.done closes the text part + raw({ + type: "response.content_part.done", + item_id: messageItemId, + content_index: 0, + part: { type: "output_text", text: "Hi" }, + }), + // output_item.done should not double-close + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 5, 2), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + // Exactly one text-end, not two + expect(types).toEqual(["text-start", "text-delta", "text-end", "finish"]); + }); + }); + + describe("full plan lifecycle ordering", () => { + it("handles the complete content_part event sequence from the plan", async () => { + // Simulates the exact sequence from the accepted plan: + // content_part.added → output_text.delta → output_text.done → content_part.done → output_item.done + // This is the ordering that would previously trip "text part not found". + const messageItemId = "msg_plan"; + const stub = createStubModel([ + raw({ + type: "response.output_item.added", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.content_part.added", + item_id: messageItemId, + content_index: 0, + part: { type: "output_text", text: "" }, + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "Test ", + }), + raw({ + type: "response.output_text.delta", + item_id: messageItemId, + content_index: 0, + delta: "output", + }), + raw({ + type: "response.output_text.done", + item_id: messageItemId, + content_index: 0, + text: "Test output", + }), + raw({ + type: "response.content_part.done", + item_id: messageItemId, + content_index: 0, + part: { type: "output_text", text: "Test output" }, + }), + raw({ + type: "response.output_item.done", + item: { id: messageItemId, type: "message" }, + output_index: 0, + }), + finish("stop", 15, 8), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + // output_item.added emits text-start for index 0, + // content_part.added sees key already exists so does NOT emit duplicate text-start, + // deltas emit text-delta, output_text.done is a no-op (matches accumulated), + // content_part.done emits text-end, output_item.done is a no-op (already closed) + expect(types).toEqual([ + "text-start", + "text-delta", // "Test " + "text-delta", // "output" + "text-end", + "finish", + ]); + + const deltas = parts.filter((p) => p.type === "text-delta") as Array<{ + type: "text-delta"; + delta: string; + }>; + expect(deltas.map((d) => d.delta)).toEqual(["Test ", "output"]); + }); + }); + describe("mixed text and tool calls", () => { + it("handles interleaved text and tool call events", async () => { + const msgId = "msg_mixed"; + const callId = "call_mixed"; + const stub = createStubModel([ + // Text message starts + raw({ + type: "response.output_item.added", + item: { id: msgId, type: "message" }, + output_index: 0, + }), + raw({ + type: "response.output_text.delta", + item_id: msgId, + content_index: 0, + delta: "Let me ", + }), + raw({ + type: "response.output_text.delta", + item_id: msgId, + content_index: 0, + delta: "check.", + }), + raw({ + type: "response.output_item.done", + item: { id: msgId, type: "message" }, + output_index: 0, + }), + // Tool call + raw({ + type: "response.output_item.added", + item: { id: "fc_m", type: "function_call", call_id: callId, name: "file_read" }, + output_index: 1, + }), + raw({ + type: "response.function_call_arguments.delta", + output_index: 1, + delta: '{"path":"test.ts"}', + }), + raw({ + type: "response.output_item.done", + item: { + id: "fc_m", + type: "function_call", + call_id: callId, + name: "file_read", + arguments: '{"path":"test.ts"}', + }, + output_index: 1, + }), + finish("tool-calls", 30, 15), + ]); + + const wrapped = wrapCopilotResponsesModel(stub); + const result = await wrapped.doStream(baseOptions); + const parts = await collectStream(result.stream); + + const types = parts.map((p) => p.type); + expect(types).toEqual([ + "text-start", + "text-delta", + "text-delta", + "text-end", + "tool-input-start", + "tool-input-delta", + "tool-input-end", + "tool-call", + "finish", + ]); + }); + }); +}); diff --git a/src/node/services/copilotResponsesModel.ts b/src/node/services/copilotResponsesModel.ts new file mode 100644 index 0000000000..139c9d82f4 --- /dev/null +++ b/src/node/services/copilotResponsesModel.ts @@ -0,0 +1,551 @@ +/** + * Copilot-only Responses streaming adapter. + * + * GitHub Copilot's /v1/responses endpoint can emit Responses API events + * in an order that differs from the standard OpenAI Responses API, causing + * the upstream SDK's V3 stream part translation to produce broken lifecycles + * (e.g., orphaned text-deltas, missing text-start/text-end pairs, or + * tool-call parts arriving before tool-input-end). + * + * This module wraps a Responses-capable LanguageModelV3 and rebuilds + * coherent V3 stream parts from raw SSE events. It only touches the + * `doStream` path; `doGenerate` is delegated unchanged. + * + * The wrapper: + * - Forces `includeRawChunks: true` on the inner model + * - Suppresses the inner model's text/tool semantic parts (which may be broken) + * - Rebuilds text-start/text-delta/text-end and tool-input-start/delta/end/tool-call + * from raw Responses events keyed by item_id + content_index / output_index + * - Passes through everything else unchanged (finish, response-metadata, + * reasoning-*, source, raw, etc.) + */ + +import type { + LanguageModelV3, + LanguageModelV3CallOptions, + LanguageModelV3StreamPart, + LanguageModelV3StreamResult, +} from "@ai-sdk/provider"; + +// --------------------------------------------------------------------------- +// Raw Responses event type guards +// --------------------------------------------------------------------------- + +interface RawResponsesEvent { + type: string; + [key: string]: unknown; +} + +function isRawEvent(value: unknown): value is RawResponsesEvent { + return ( + typeof value === "object" && + value != null && + typeof (value as RawResponsesEvent).type === "string" + ); +} + +// response.output_item.added — item: { type: "message" | "function_call" | ... } +function isOutputItemAdded(ev: RawResponsesEvent): boolean { + return ev.type === "response.output_item.added"; +} + +// response.output_item.done +function isOutputItemDone(ev: RawResponsesEvent): boolean { + return ev.type === "response.output_item.done"; +} + +// response.output_text.delta +function isTextDelta(ev: RawResponsesEvent): boolean { + return ev.type === "response.output_text.delta"; +} + +// response.function_call_arguments.delta +function isFunctionCallArgsDelta(ev: RawResponsesEvent): boolean { + return ev.type === "response.function_call_arguments.delta"; +} + +// response.content_part.added +function isContentPartAdded(ev: RawResponsesEvent): boolean { + return ev.type === "response.content_part.added"; +} + +// response.content_part.done +function isContentPartDone(ev: RawResponsesEvent): boolean { + return ev.type === "response.content_part.done"; +} + +// response.output_text.done +function isTextDone(ev: RawResponsesEvent): boolean { + return ev.type === "response.output_text.done"; +} + +// response.function_call_arguments.done +function isFunctionCallArgsDone(ev: RawResponsesEvent): boolean { + return ev.type === "response.function_call_arguments.done"; +} + +// response.failed +function isResponseFailed(ev: RawResponsesEvent): boolean { + return ev.type === "response.failed"; +} + +// --------------------------------------------------------------------------- +// Helpers to extract fields from raw events +// --------------------------------------------------------------------------- + +function getItemId(ev: RawResponsesEvent): string | undefined { + // Some events use `item_id`, output_item events nest under `item.id` + if (typeof ev.item_id === "string") return ev.item_id; + const item = ev.item as Record | undefined; + if (item && typeof item.id === "string") return item.id; + return undefined; +} + +function getItemType(ev: RawResponsesEvent): string | undefined { + const item = ev.item as Record | undefined; + return item && typeof item.type === "string" ? item.type : undefined; +} + +function getOutputIndex(ev: RawResponsesEvent): number { + return typeof ev.output_index === "number" ? ev.output_index : 0; +} + +function getContentIndex(ev: RawResponsesEvent): number { + return typeof ev.content_index === "number" ? ev.content_index : 0; +} + +function getDelta(ev: RawResponsesEvent): string { + return typeof ev.delta === "string" ? ev.delta : ""; +} + +function getPartType(ev: RawResponsesEvent): string | undefined { + const part = ev.part as Record | undefined; + return part && typeof part.type === "string" ? part.type : undefined; +} + +function getPartText(ev: RawResponsesEvent): string | undefined { + const part = ev.part as Record | undefined; + return part && typeof part.text === "string" ? part.text : undefined; +} + +function getText(ev: RawResponsesEvent): string | undefined { + return typeof ev.text === "string" ? ev.text : undefined; +} + +function getArguments(ev: RawResponsesEvent): string | undefined { + return typeof ev.arguments === "string" ? ev.arguments : undefined; +} + +// --------------------------------------------------------------------------- +// Semantic part types suppressed from the inner stream +// --------------------------------------------------------------------------- + +/** Parts the wrapper rebuilds from raw events — suppress the inner model's versions. */ +const SUPPRESSED_INNER_TYPES = new Set([ + "text-start", + "text-delta", + "text-end", + "tool-input-start", + "tool-input-delta", + "tool-input-end", + "tool-call", +]); + +// --------------------------------------------------------------------------- +// Tracking state for open text / tool parts +// --------------------------------------------------------------------------- + +interface OpenTextPart { + /** Composite id emitted on text-start/delta/end (`${itemId}:${contentIndex}`). */ + externalId: string; + started: boolean; + /** Accumulated text from deltas, used for reconciliation in output_text.done. */ + accum: string; +} + +interface OpenToolCall { + itemId: string; + callId: string; + toolName: string; + args: string; + started: boolean; + finalized: boolean; +} + +// --------------------------------------------------------------------------- +// Stream transformer +// --------------------------------------------------------------------------- + +function createCopilotResponsesTransform( + outerWantsRaw: boolean +): TransformStream { + // Track open text parts by `${itemId}:${contentIndex}` + const openTexts = new Map(); + // Track open tool calls by output_index + const openTools = new Map(); + + return new TransformStream({ + transform(chunk, controller) { + // Always forward raw chunks if the outer caller wanted them + if (chunk.type === "raw") { + if (outerWantsRaw) { + controller.enqueue(chunk); + } + + // Process the raw event to rebuild semantic parts + const rawValue = chunk.rawValue; + if (!isRawEvent(rawValue)) return; + + processRawEvent(rawValue, controller); + return; + } + + // Suppress the inner model's semantic text/tool parts — we rebuild from raw + if (SUPPRESSED_INNER_TYPES.has(chunk.type)) { + return; + } + + // Pass through everything else: finish, response-metadata, reasoning-*, source, error, stream-start, etc. + controller.enqueue(chunk); + }, + + flush(controller) { + // Close any still-open text parts (defensive — should not happen in normal flow). + for (const [, text] of openTexts) { + if (text.started) { + controller.enqueue({ type: "text-end", id: text.externalId }); + } + } + openTexts.clear(); + + // Finalize any still-open tool calls + for (const [, tool] of openTools) { + if (!tool.finalized) { + finalizeToolCall(tool, controller); + } + } + openTools.clear(); + }, + }); + + // ------------------------------------------- + // Raw event processing + // ------------------------------------------- + + function processRawEvent( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + if (isOutputItemAdded(ev)) { + handleOutputItemAdded(ev, controller); + } else if (isContentPartAdded(ev)) { + handleContentPartAdded(ev, controller); + } else if (isTextDelta(ev)) { + handleTextDelta(ev, controller); + } else if (isTextDone(ev)) { + handleTextDone(ev, controller); + } else if (isContentPartDone(ev)) { + handleContentPartDone(ev, controller); + } else if (isFunctionCallArgsDelta(ev)) { + handleFunctionCallArgsDelta(ev, controller); + } else if (isFunctionCallArgsDone(ev)) { + handleFunctionCallArgsDone(ev, controller); + } else if (isOutputItemDone(ev)) { + handleOutputItemDone(ev, controller); + } else if (isResponseFailed(ev)) { + handleResponseFailed(ev, controller); + } + // response.created, response.completed, response.incomplete, + // response.reasoning_summary_*, response.output_text.annotation.*, + // etc. are either already handled by passthrough parts (finish, + // response-metadata, reasoning-*, source) or don't need semantic translation. + } + + function handleOutputItemAdded( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemType = getItemType(ev); + const itemId = getItemId(ev); + if (!itemId) return; + + if (itemType === "message") { + // A message item can have multiple content parts, but typically starts + // with index 0. We emit text-start eagerly here — the SDK does the same. + // Use composite key as the external id so each content part gets a + // unique identity (prevents DevTools from overwriting accumulated text + // when multiple content indices share the same itemId). + const key = `${itemId}:${getContentIndex(ev)}`; + openTexts.set(key, { externalId: key, started: true, accum: "" }); + controller.enqueue({ type: "text-start", id: key }); + } else if (itemType === "function_call") { + const item = ev.item as Record; + const callId = typeof item.call_id === "string" ? item.call_id : itemId; + const toolName = typeof item.name === "string" ? item.name : "unknown"; + const outputIndex = getOutputIndex(ev); + + openTools.set(outputIndex, { + itemId, + callId, + toolName, + args: "", + started: true, + finalized: false, + }); + + controller.enqueue({ + type: "tool-input-start", + id: callId, + toolName, + }); + } + } + + function handleTextDelta( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemId = typeof ev.item_id === "string" ? ev.item_id : undefined; + if (!itemId) return; + + const contentIndex = getContentIndex(ev); + const key = `${itemId}:${contentIndex}`; + + // Ensure text-start was emitted (defensive against out-of-order events) + if (!openTexts.has(key)) { + openTexts.set(key, { externalId: key, started: true, accum: "" }); + controller.enqueue({ type: "text-start", id: key }); + } + + const delta = getDelta(ev); + if (delta.length > 0) { + const text = openTexts.get(key)!; + text.accum += delta; + controller.enqueue({ type: "text-delta", id: text.externalId, delta }); + } + } + + /** + * response.content_part.added — emitted when a new content part (e.g., output_text) + * is added to a message item. Triggers text-start if not already started, plus an + * optional initial text-delta if the part carries inline text. + */ + function handleContentPartAdded( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemId = typeof ev.item_id === "string" ? ev.item_id : undefined; + if (!itemId) return; + + const partType = getPartType(ev); + // Only handle output_text content parts + if (partType !== "output_text") return; + + const contentIndex = getContentIndex(ev); + const key = `${itemId}:${contentIndex}`; + + if (!openTexts.has(key)) { + openTexts.set(key, { externalId: key, started: true, accum: "" }); + controller.enqueue({ type: "text-start", id: key }); + } + + // Some servers include initial text inline in the content_part.added event + const initialText = getPartText(ev); + if (initialText && initialText.length > 0) { + const text = openTexts.get(key)!; + text.accum += initialText; + controller.enqueue({ type: "text-delta", id: text.externalId, delta: initialText }); + } + } + + /** + * response.output_text.done — emitted when the full text for a content part + * is finalized. Reconciles any trailing text that was not delivered via deltas. + */ + function handleTextDone( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemId = typeof ev.item_id === "string" ? ev.item_id : undefined; + if (!itemId) return; + + const contentIndex = getContentIndex(ev); + const key = `${itemId}:${contentIndex}`; + const finalText = getText(ev); + + const text = openTexts.get(key); + if (text && finalText != null && finalText.length > text.accum.length) { + // Emit any trailing text that the deltas missed + const trailing = finalText.slice(text.accum.length); + text.accum = finalText; + controller.enqueue({ type: "text-delta", id: text.externalId, delta: trailing }); + } + } + + /** + * response.content_part.done — emitted when a content part is fully done. + * Closes the text part with text-end. + */ + function handleContentPartDone( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemId = typeof ev.item_id === "string" ? ev.item_id : undefined; + if (!itemId) return; + + const contentIndex = getContentIndex(ev); + const key = `${itemId}:${contentIndex}`; + + const text = openTexts.get(key); + if (text?.started) { + controller.enqueue({ type: "text-end", id: text.externalId }); + openTexts.delete(key); + } + } + + function handleFunctionCallArgsDelta( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const outputIndex = getOutputIndex(ev); + const tool = openTools.get(outputIndex); + if (!tool) return; + + const delta = getDelta(ev); + tool.args += delta; + + if (delta.length > 0) { + controller.enqueue({ type: "tool-input-delta", id: tool.callId, delta }); + } + } + + /** + * response.function_call_arguments.done — emitted when the final arguments + * for a function call are available. Reconciles accumulated args and finalizes + * the tool call (tool-input-end + tool-call), so output_item.done becomes + * a no-op fallback for this tool. + */ + function handleFunctionCallArgsDone( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const outputIndex = getOutputIndex(ev); + const tool = openTools.get(outputIndex); + if (!tool || tool.finalized) return; + + // Reconcile final arguments from the done event + const finalArgs = getArguments(ev); + if (finalArgs != null) { + tool.args = finalArgs; + } + + finalizeToolCall(tool, controller); + openTools.delete(outputIndex); + } + + /** + * response.output_item.done — fallback finalization. + * For message items: closes any still-open text parts that content_part.done did not + * already close (e.g., when the server omits content_part events). + * For function_call items: finalizes only if function_call_arguments.done was not + * received (the tool will already be deleted from openTools if args-done ran). + */ + function handleOutputItemDone( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const itemType = getItemType(ev); + const itemId = getItemId(ev); + if (!itemId) return; + + if (itemType === "message") { + // Fallback: close any text parts that content_part.done did not already close + for (const [key, text] of openTexts) { + if (text.externalId.startsWith(`${itemId}:`) && text.started) { + controller.enqueue({ type: "text-end", id: text.externalId }); + openTexts.delete(key); + } + } + } else if (itemType === "function_call") { + // Fallback: finalize only if function_call_arguments.done did not already do so + const outputIndex = getOutputIndex(ev); + const tool = openTools.get(outputIndex); + if (tool && !tool.finalized) { + // output_item.done for function_call contains the final arguments + const item = ev.item as Record; + if (typeof item.arguments === "string") { + tool.args = item.arguments; + } + finalizeToolCall(tool, controller); + openTools.delete(outputIndex); + } + } + } + + function finalizeToolCall( + tool: OpenToolCall, + controller: TransformStreamDefaultController + ): void { + if (tool.finalized) return; + tool.finalized = true; + + controller.enqueue({ type: "tool-input-end", id: tool.callId }); + controller.enqueue({ + type: "tool-call", + toolCallId: tool.callId, + toolName: tool.toolName, + input: tool.args, + }); + } + + function handleResponseFailed( + ev: RawResponsesEvent, + controller: TransformStreamDefaultController + ): void { + const response = ev.response as Record | undefined; + const error = response?.error ?? ev.error ?? "Copilot Responses request failed"; + controller.enqueue({ type: "error", error }); + } +} + +// --------------------------------------------------------------------------- +// Public wrapper +// --------------------------------------------------------------------------- + +/** + * Wrap a Responses-capable LanguageModelV3 for use with GitHub Copilot. + * + * The wrapper rebuilds coherent V3 text/tool stream parts from raw Responses + * events, compensating for ordering differences in Copilot's /v1/responses. + * + * - `doGenerate` is delegated unchanged. + * - `doStream` forces `includeRawChunks: true`, suppresses the inner model's + * text/tool semantic parts, and rebuilds them from the raw events. + */ +export function wrapCopilotResponsesModel(inner: LanguageModelV3): LanguageModelV3 { + return { + specificationVersion: inner.specificationVersion, + provider: inner.provider, + modelId: inner.modelId, + supportedUrls: inner.supportedUrls, + + doGenerate: (options: LanguageModelV3CallOptions) => inner.doGenerate(options), + + async doStream(options: LanguageModelV3CallOptions): Promise { + const outerWantsRaw = options.includeRawChunks === true; + + // Force raw chunks so we can rebuild semantics from SSE events + const innerOptions: LanguageModelV3CallOptions = { + ...options, + includeRawChunks: true, + }; + + const result = await inner.doStream(innerOptions); + + return { + ...result, + stream: result.stream.pipeThrough(createCopilotResponsesTransform(outerWantsRaw)), + }; + }, + }; +} diff --git a/src/node/services/messagePipeline.ts b/src/node/services/messagePipeline.ts index 4b0cedd893..7db9790922 100644 --- a/src/node/services/messagePipeline.ts +++ b/src/node/services/messagePipeline.ts @@ -29,6 +29,7 @@ import { injectPostCompactionAttachments, } from "@/browser/utils/messages/modelMessageTransform"; import { applyCacheControl, type AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; +import { normalizeToolCallIds } from "@/node/utils/messages/normalizeToolCallIds"; import { log } from "./log"; /** Options for the full message preparation pipeline. */ @@ -81,9 +82,10 @@ export interface PrepareMessagesOptions { * 10. Rewriting data-URI file parts to SDK-safe inline base64 * 11. Converting to Vercel AI SDK ModelMessage format * 12. Self-healing: filtering empty/whitespace assistant messages - * 13. Applying provider-specific message transforms - * 14. Applying cache control headers - * 15. Validating Anthropic compliance (logs warnings only) + * 13. Normalizing oversized tool call IDs (>64 chars) for OpenAI compatibility + * 14. Applying provider-specific message transforms + * 15. Applying cache control headers + * 16. Validating Anthropic compliance (logs warnings only) */ export async function prepareMessagesForProvider( opts: PrepareMessagesOptions @@ -184,10 +186,15 @@ export async function prepareMessagesForProvider( const modelMessages = sanitizeAssistantModelMessages(rawModelMessages, workspaceId); - log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages); + // Normalize oversized tool call IDs (>64 chars) that would be rejected by + // OpenAI's Responses API. Uses deterministic hashing so paired tool-call and + // tool-result parts stay matched. Request-only — does not mutate history. + const normalizedMessages = normalizeToolCallIds(modelMessages); + + log.debug_obj(`${workspaceId}/2_model_messages.json`, normalizedMessages); // Apply ModelMessage transforms based on provider requirements - const transformedMessages = transformModelMessages(modelMessages, providerForMessages, { + const transformedMessages = transformModelMessages(normalizedMessages, providerForMessages, { anthropicThinkingEnabled: providerForMessages === "anthropic" && effectiveThinkingLevel !== "off", }); diff --git a/src/node/services/providerModelFactory.test.ts b/src/node/services/providerModelFactory.test.ts index 06dc98525a..a1a1f44481 100644 --- a/src/node/services/providerModelFactory.test.ts +++ b/src/node/services/providerModelFactory.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "bun:test"; +import { describe, expect, it, spyOn } from "bun:test"; import * as fs from "fs"; import * as os from "os"; import * as path from "path"; @@ -13,6 +13,7 @@ import { resolveAIProviderHeaderSource, } from "./providerModelFactory"; import { ProviderService } from "./providerService"; +import * as copilotResponsesModule from "./copilotResponsesModel"; async function withTempConfig( run: (config: Config, factory: ProviderModelFactory) => Promise | void @@ -451,6 +452,178 @@ describe("ProviderModelFactory routing", () => { }); }); +describe("ProviderModelFactory Copilot endpoint selection", () => { + // Helper: extract the SDK provider identifier from a LanguageModel. + // At runtime createModel always returns an object model (LanguageModelV3), + // but the TS union type includes string literals so we narrow here. + function getModelProvider(model: unknown): string { + return (model as { provider: string }).provider; + } + + it("uses responses endpoint for a Copilot model with /v1/responses support", async () => { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + // gpt-5.2 has Copilot-scoped metadata with both /v1/chat/completions and /v1/responses + const result = await factory.createModel("github-copilot:gpt-5.2"); + expect(result.success).toBe(true); + if (!result.success) return; + + // When responses endpoint is supported, model provider should include "responses" + expect(getModelProvider(result.data)).toContain("responses"); + }); + }); + + it("uses responses endpoint for a responses-only Copilot model", async () => { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + // gpt-5.1-codex-max in models.json has supported_endpoints: ["/v1/responses"] only + const result = await factory.createModel("github-copilot:gpt-5.1-codex-max"); + expect(result.success).toBe(true); + if (!result.success) return; + + expect(getModelProvider(result.data)).toContain("responses"); + }); + }); + + it("uses chat endpoint for a chat-only Copilot model", async () => { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + // claude-sonnet-4 in models.json has supported_endpoints: ["/v1/chat/completions"] + const result = await factory.createModel("github-copilot:claude-sonnet-4"); + expect(result.success).toBe(true); + if (!result.success) return; + + expect(getModelProvider(result.data)).toContain("chat"); + expect(getModelProvider(result.data)).not.toContain("responses"); + }); + }); + + it("falls back to chat endpoint when endpoint metadata is absent", async () => { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + // gpt-4o has no supported_endpoints in either copilot-scoped or bare metadata + const result = await factory.createModel("github-copilot:gpt-4o"); + expect(result.success).toBe(true); + if (!result.success) return; + + expect(getModelProvider(result.data)).toContain("chat"); + expect(getModelProvider(result.data)).not.toContain("responses"); + }); + }); + + it("inherits bare-model endpoint metadata when provider-scoped entry is missing", async () => { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + // github_copilot/gpt-5.4 does NOT exist in models.json, + // but bare "gpt-5.4" in models-extra has supported_endpoints with /v1/responses. + const result = await factory.createModel("github-copilot:gpt-5.4"); + expect(result.success).toBe(true); + if (!result.success) return; + + expect(getModelProvider(result.data)).toContain("responses"); + }); + }); +}); + +describe("ProviderModelFactory Copilot wrapper invocation", () => { + it("invokes wrapCopilotResponsesModel for a responses-capable Copilot model", async () => { + const wrapSpy = spyOn(copilotResponsesModule, "wrapCopilotResponsesModel"); + try { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + wrapSpy.mockClear(); + + // gpt-5.2 has Copilot-scoped metadata with /v1/responses support + const result = await factory.createModel("github-copilot:gpt-5.2"); + expect(result.success).toBe(true); + + // The wrapper must have been called exactly once for the responses branch + expect(wrapSpy).toHaveBeenCalledTimes(1); + }); + } finally { + wrapSpy.mockRestore(); + } + }); + + it("does not invoke wrapCopilotResponsesModel for a chat-only Copilot model", async () => { + const wrapSpy = spyOn(copilotResponsesModule, "wrapCopilotResponsesModel"); + try { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + wrapSpy.mockClear(); + + // claude-sonnet-4 has supported_endpoints: ["/v1/chat/completions"] only + const result = await factory.createModel("github-copilot:claude-sonnet-4"); + expect(result.success).toBe(true); + + // The wrapper should NOT be called for chat-only models + expect(wrapSpy).not.toHaveBeenCalled(); + }); + } finally { + wrapSpy.mockRestore(); + } + }); + + it("does not invoke wrapCopilotResponsesModel when endpoint metadata is absent", async () => { + const wrapSpy = spyOn(copilotResponsesModule, "wrapCopilotResponsesModel"); + try { + await withTempConfig(async (config, factory) => { + config.saveProvidersConfig({ + "github-copilot": { + apiKey: "ghu_test", + }, + }); + + wrapSpy.mockClear(); + + // gpt-4o has no supported_endpoints metadata — should fall back to chat + const result = await factory.createModel("github-copilot:gpt-4o"); + expect(result.success).toBe(true); + + // No wrapper invocation for metadata-missing models + expect(wrapSpy).not.toHaveBeenCalled(); + }); + } finally { + wrapSpy.mockRestore(); + } + }); +}); + describe("classifyCopilotInitiator", () => { it("returns 'user' when last message role is user", () => { const body = JSON.stringify({ messages: [{ role: "user", content: "hello" }] }); diff --git a/src/node/services/providerModelFactory.ts b/src/node/services/providerModelFactory.ts index acd1d9961f..b077a0a93d 100644 --- a/src/node/services/providerModelFactory.ts +++ b/src/node/services/providerModelFactory.ts @@ -34,12 +34,14 @@ import { normalizeToCanonical, } from "@/common/utils/ai/models"; import type { AnthropicCacheTtl } from "@/common/utils/ai/cacheStrategy"; +import { getSupportedEndpointsResolved } from "@/common/utils/ai/modelCapabilities"; import { MUX_APP_ATTRIBUTION_TITLE, MUX_APP_ATTRIBUTION_URL } from "@/constants/appAttribution"; import { resolveProviderCredentials } from "@/node/utils/providerRequirements"; import { normalizeGatewayStreamUsage, normalizeGatewayGenerateResult, } from "@/node/utils/gatewayStreamNormalization"; +import { wrapCopilotResponsesModel } from "@/node/services/copilotResponsesModel"; import { EnvHttpProxyAgent, type Dispatcher } from "undici"; import packageJson from "../../../package.json"; @@ -1488,8 +1490,6 @@ export class ProviderModelFactory { return Err({ type: "api_key_not_found", provider: providerName }); } - const { createOpenAICompatible } = await PROVIDER_REGISTRY["github-copilot"](); - const baseFetch = getProviderFetch(providerConfig); const copilotFetchFn = async ( input: Parameters[0], @@ -1528,6 +1528,34 @@ export class ProviderModelFactory { const providerFetch = copilotFetch; const baseURL = providerConfig.baseURL ?? "https://api.githubcopilot.com"; + + // Determine whether the Copilot model supports the Responses API. + // Check provider-scoped metadata first (github_copilot/model), then fall + // back to bare-model metadata via alias resolution. Conservative default: + // use chat/completions when no endpoint metadata exists. + const fullCopilotModelId = `github-copilot:${modelId}`; + const endpoints = getSupportedEndpointsResolved(fullCopilotModelId, providersConfig); + const supportsResponses = endpoints?.includes("/v1/responses") ?? false; + // Prefer responses when available; fall back to chat (current behavior) + // when only chat is supported or endpoint metadata is absent entirely. + const useCopilotResponses = supportsResponses; + + if (useCopilotResponses) { + // Use the full OpenAI SDK provider which supports .responses() — + // createOpenAICompatible only exposes .chatModel(). + // Keep provider name as "github-copilot" so the SDK reads provider + // options from the correct namespace (matching buildProviderOptions output). + const { createOpenAI } = await PROVIDER_REGISTRY.openai(); + const provider = createOpenAI({ + name: "github-copilot", + baseURL, + apiKey: "copilot", // placeholder — actual auth via custom fetch + fetch: providerFetch, + }); + return Ok(wrapCopilotResponsesModel(provider.responses(modelId))); + } + + const { createOpenAICompatible } = await PROVIDER_REGISTRY["github-copilot"](); const provider = createOpenAICompatible({ name: "github-copilot", baseURL, diff --git a/src/node/utils/messages/normalizeToolCallIds.test.ts b/src/node/utils/messages/normalizeToolCallIds.test.ts new file mode 100644 index 0000000000..13c54d8dfc --- /dev/null +++ b/src/node/utils/messages/normalizeToolCallIds.test.ts @@ -0,0 +1,188 @@ +import { describe, it, expect } from "@jest/globals"; +import type { ModelMessage } from "ai"; +import { normalizeToolCallIds, shortenToolCallId } from "./normalizeToolCallIds"; + +/** Helper: a string of exactly `n` characters. */ +function makeId(n: number, prefix = "call_"): string { + return prefix + "x".repeat(Math.max(0, n - prefix.length)); +} + +describe("shortenToolCallId", () => { + it("returns a string of exactly 64 characters", () => { + const result = shortenToolCallId("a".repeat(100)); + expect(result.length).toBe(64); + }); + + it("starts with call_ prefix", () => { + const result = shortenToolCallId("something-very-long"); + expect(result.startsWith("call_")).toBe(true); + }); + + it("is deterministic — same input gives same output", () => { + const longId = "x".repeat(200); + expect(shortenToolCallId(longId)).toBe(shortenToolCallId(longId)); + }); + + it("produces different outputs for different inputs", () => { + const a = shortenToolCallId("a".repeat(100)); + const b = shortenToolCallId("b".repeat(100)); + expect(a).not.toBe(b); + }); +}); + +describe("normalizeToolCallIds", () => { + const shortId = makeId(64); // exactly at the limit — should NOT be rewritten + const longId = makeId(65); // one over the limit — should be rewritten + const veryLongId = makeId(200); // well over the limit + + it("returns the same array reference when no IDs exceed 64 chars", () => { + const messages: ModelMessage[] = [ + { + role: "assistant", + content: [{ type: "tool-call", toolCallId: shortId, toolName: "bash", input: {} }], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: shortId, + toolName: "bash", + output: { type: "text" as const, value: "ok" }, + }, + ], + }, + ]; + + const result = normalizeToolCallIds(messages); + // Same reference means no unnecessary cloning + expect(result).toBe(messages); + }); + + it("rewrites oversized IDs in both tool-call and tool-result parts", () => { + const messages: ModelMessage[] = [ + { + role: "assistant", + content: [{ type: "tool-call", toolCallId: longId, toolName: "bash", input: {} }], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: longId, + toolName: "bash", + output: { type: "text" as const, value: "ok" }, + }, + ], + }, + ]; + + const result = normalizeToolCallIds(messages); + + // Both should have the same normalized ID + const assistantContent = result[0].content as Array<{ toolCallId: string }>; + const toolContent = result[1].content as Array<{ toolCallId: string }>; + + expect(assistantContent[0].toolCallId.length).toBeLessThanOrEqual(64); + expect(toolContent[0].toolCallId.length).toBeLessThanOrEqual(64); + expect(assistantContent[0].toolCallId).toBe(toolContent[0].toolCallId); + }); + + it("preserves IDs that are within the limit alongside oversized ones", () => { + const messages: ModelMessage[] = [ + { + role: "assistant", + content: [ + { type: "tool-call", toolCallId: shortId, toolName: "bash", input: {} }, + { type: "tool-call", toolCallId: veryLongId, toolName: "edit", input: {} }, + ], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: shortId, + toolName: "bash", + output: { type: "text" as const, value: "ok" }, + }, + ], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: veryLongId, + toolName: "edit", + output: { type: "text" as const, value: "done" }, + }, + ], + }, + ]; + + const result = normalizeToolCallIds(messages); + + const assistantContent = result[0].content as Array<{ toolCallId: string }>; + const toolContent0 = result[1].content as Array<{ toolCallId: string }>; + const toolContent1 = result[2].content as Array<{ toolCallId: string }>; + + // Short ID untouched + expect(assistantContent[0].toolCallId).toBe(shortId); + expect(toolContent0[0].toolCallId).toBe(shortId); + + // Long ID rewritten + expect(assistantContent[1].toolCallId).not.toBe(veryLongId); + expect(assistantContent[1].toolCallId.length).toBeLessThanOrEqual(64); + // Paired result matches + expect(toolContent1[0].toolCallId).toBe(assistantContent[1].toolCallId); + }); + + it("does not mutate original messages", () => { + const originalCallId = longId; + const messages: ModelMessage[] = [ + { + role: "assistant", + content: [{ type: "tool-call", toolCallId: originalCallId, toolName: "bash", input: {} }], + }, + { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: originalCallId, + toolName: "bash", + output: { type: "text" as const, value: "ok" }, + }, + ], + }, + ]; + + normalizeToolCallIds(messages); + + // Original messages untouched + const assistantContent = messages[0].content as Array<{ toolCallId: string }>; + const toolContent = messages[1].content as Array<{ toolCallId: string }>; + expect(assistantContent[0].toolCallId).toBe(originalCallId); + expect(toolContent[0].toolCallId).toBe(originalCallId); + }); + + it("passes through non-assistant/tool messages unchanged", () => { + const userMsg: ModelMessage = { role: "user", content: [{ type: "text", text: "hello" }] }; + const systemMsg: ModelMessage = { role: "system", content: "system prompt" }; + + const messages: ModelMessage[] = [userMsg, systemMsg]; + const result = normalizeToolCallIds(messages); + + expect(result[0]).toBe(userMsg); + expect(result[1]).toBe(systemMsg); + }); + + it("handles assistant messages with string content (no tool calls)", () => { + const messages: ModelMessage[] = [{ role: "assistant", content: "just text" }]; + + const result = normalizeToolCallIds(messages); + expect(result).toBe(messages); + }); +}); diff --git a/src/node/utils/messages/normalizeToolCallIds.ts b/src/node/utils/messages/normalizeToolCallIds.ts new file mode 100644 index 0000000000..2319fbb26f --- /dev/null +++ b/src/node/utils/messages/normalizeToolCallIds.ts @@ -0,0 +1,109 @@ +/** + * Normalize oversized tool call IDs in ModelMessages. + * + * OpenAI's Responses API rejects `call_id` values longer than 64 characters. + * Tool call IDs generated by other providers (or by internal systems) can exceed + * this limit. This module rewrites them deterministically so that paired + * tool-call and tool-result parts still match after normalization. + * + * Applied request-only — does not mutate persisted history. + */ + +import { createHash } from "node:crypto"; +import type { ModelMessage } from "ai"; + +/** OpenAI's maximum allowed length for tool call IDs. */ +const MAX_TOOL_CALL_ID_LENGTH = 64; + +/** + * Prefix for normalized IDs so they're recognizable in debug logs. + * "call_" (5 chars) + 59 chars of hash = 64 chars total. + */ +const NORMALIZED_PREFIX = "call_"; +const HASH_LENGTH = MAX_TOOL_CALL_ID_LENGTH - NORMALIZED_PREFIX.length; + +/** + * Deterministically shorten a tool call ID to fit within the 64-char limit. + * Uses SHA-256 (hex) truncated to fill the available space after the prefix. + * The same input always produces the same output, so paired tool-call and + * tool-result parts will agree on the normalized value. + */ +export function shortenToolCallId(id: string): string { + const hash = createHash("sha256").update(id).digest("hex"); + return `${NORMALIZED_PREFIX}${hash.slice(0, HASH_LENGTH)}`; +} + +/** + * Build a mapping from oversized tool call IDs to their normalized forms. + * Scans all assistant (tool-call) and tool (tool-result) messages once to + * collect every ID that exceeds the limit. + */ +function buildIdMapping(messages: ModelMessage[]): Map { + const oversizedIds = new Set(); + + for (const msg of messages) { + if (msg.role === "assistant" && Array.isArray(msg.content)) { + for (const part of msg.content) { + if (part.type === "tool-call" && part.toolCallId.length > MAX_TOOL_CALL_ID_LENGTH) { + oversizedIds.add(part.toolCallId); + } + } + } else if (msg.role === "tool") { + for (const part of msg.content) { + if (part.type === "tool-result" && part.toolCallId.length > MAX_TOOL_CALL_ID_LENGTH) { + oversizedIds.add(part.toolCallId); + } + } + } + } + + const mapping = new Map(); + for (const id of oversizedIds) { + mapping.set(id, shortenToolCallId(id)); + } + return mapping; +} + +/** + * Rewrite oversized tool call IDs in the given ModelMessages. + * + * Returns a new array (shallow copy) with only the affected parts cloned and + * rewritten. Messages without oversized IDs are passed through unchanged. + * + * This is a request-only transform — it does not mutate the original messages + * or persisted history. + */ +export function normalizeToolCallIds(messages: ModelMessage[]): ModelMessage[] { + const mapping = buildIdMapping(messages); + if (mapping.size === 0) { + return messages; + } + + return messages.map((msg): ModelMessage => { + if (msg.role === "assistant" && Array.isArray(msg.content)) { + let changed = false; + const newContent = msg.content.map((part) => { + if (part.type === "tool-call" && mapping.has(part.toolCallId)) { + changed = true; + return { ...part, toolCallId: mapping.get(part.toolCallId)! }; + } + return part; + }); + return changed ? { ...msg, content: newContent } : msg; + } + + if (msg.role === "tool") { + let changed = false; + const newContent = msg.content.map((part) => { + if (part.type === "tool-result" && mapping.has(part.toolCallId)) { + changed = true; + return { ...part, toolCallId: mapping.get(part.toolCallId)! }; + } + return part; + }); + return changed ? { ...msg, content: newContent } : msg; + } + + return msg; + }); +}