diff --git a/extensions/cli/src/stream/streamChatResponse.helpers.ts b/extensions/cli/src/stream/streamChatResponse.helpers.ts index 6f8f3fa18de..da3f343971d 100644 --- a/extensions/cli/src/stream/streamChatResponse.helpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.helpers.ts @@ -383,31 +383,18 @@ export function recordStreamTelemetry(options: { cacheWriteTokens, }); - // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard - if (actualInputTokens > 0) { - posthogService.capture("prompt_cache_metrics", { - model: model.model, - cache_read_tokens: cacheReadTokens, - cache_write_tokens: cacheWriteTokens, - total_prompt_tokens: actualInputTokens, - cache_hit_rate: cacheReadTokens / actualInputTokens, - tool_count: tools?.length ?? 0, - }); - } - } catch {} - - // Report prompt cache metrics to PostHog - if (fullUsage?.prompt_tokens_details) { - const cacheReadTokens = - fullUsage.prompt_tokens_details.cache_read_tokens ?? 0; - const cacheWriteTokens = - fullUsage.prompt_tokens_details.cache_write_tokens ?? 0; - const totalPromptTokens = fullUsage.prompt_tokens ?? 0; - const cacheHitRate = - totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0; + // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard. + // total_prompt_tokens must include ALL input token types: non-cached input + // tokens (prompt_tokens) + cache reads + cache writes. Anthropic's + // `input_tokens` field only counts non-cached tokens, so using it alone as + // the denominator produces ratios >> 1 when caching works well. + if (fullUsage?.prompt_tokens_details) { + const totalPromptTokens = + (fullUsage.prompt_tokens ?? 0) + cacheReadTokens + cacheWriteTokens; + const cacheHitRate = + totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0; - try { - void posthogService.capture("prompt_cache_metrics", { + posthogService.capture("prompt_cache_metrics", { model: model.model, cache_read_tokens: cacheReadTokens, cache_write_tokens: cacheWriteTokens, @@ -415,8 +402,8 @@ export function recordStreamTelemetry(options: { cache_hit_rate: cacheHitRate, tool_count: tools?.length ?? 0, }); - } catch {} - } + } + } catch {} return cost; } diff --git a/packages/openai-adapters/src/test/vercelStreamConverter.test.ts b/packages/openai-adapters/src/test/vercelStreamConverter.test.ts index 8bbb9e31c8d..839e3a8c781 100644 --- a/packages/openai-adapters/src/test/vercelStreamConverter.test.ts +++ b/packages/openai-adapters/src/test/vercelStreamConverter.test.ts @@ -37,7 +37,7 @@ describe("convertVercelStreamPart", () => { expect(result?.choices[0].delta.content).toBe("Let me think..."); }); - test("converts tool-call to chat chunk", () => { + test("returns null for tool-call (handled by tool-input-start/delta)", () => { const part: VercelStreamPart = { type: "tool-call", toolCallId: "call_abc123", @@ -47,17 +47,7 @@ describe("convertVercelStreamPart", () => { const result = convertVercelStreamPart(part, options); - expect(result).not.toBeNull(); - expect(result?.choices[0].delta.tool_calls).toHaveLength(1); - expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({ - index: 0, - id: "call_abc123", - type: "function", - function: { - name: "readFile", - arguments: JSON.stringify({ filepath: "/path/to/file" }), - }, - }); + expect(result).toBeNull(); }); test("converts tool-input-delta to chat chunk", () => { @@ -170,7 +160,7 @@ describe("convertVercelStreamPart", () => { expect(result).toBeNull(); }); - test("returns null for tool-input-start", () => { + test("converts tool-input-start to initial tool call chunk with id and name", () => { const part: VercelStreamPart = { type: "tool-input-start", id: "call_abc123", @@ -179,7 +169,17 @@ describe("convertVercelStreamPart", () => { const result = convertVercelStreamPart(part, options); - expect(result).toBeNull(); + expect(result).not.toBeNull(); + expect(result?.choices[0].delta.tool_calls).toHaveLength(1); + expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({ + index: 0, + id: "call_abc123", + type: "function", + function: { + name: "readFile", + arguments: "", + }, + }); }); }); @@ -191,6 +191,9 @@ describe("convertVercelStream", () => { { type: "start-step" }, { type: "text-delta", id: "text-1", text: "Hello " }, { type: "text-delta", id: "text-1", text: "world" }, + { type: "tool-input-start", id: "call_1", toolName: "test" }, + { type: "tool-input-delta", id: "call_1", delta: '{"arg":"value"}' }, + { type: "tool-input-end", id: "call_1" }, { type: "tool-call", toolCallId: "call_1", @@ -221,16 +224,20 @@ describe("convertVercelStream", () => { chunks.push(chunk); } - // Should only get chunks for: text-delta (2), tool-call (1), finish (1) = 4 chunks - // start-step and finish-step are filtered out - expect(chunks).toHaveLength(4); + // Should get chunks for: text-delta (2), tool-input-start (1), tool-input-delta (1), finish (1) = 5 + // start-step, tool-input-end, tool-call, and finish-step are filtered out + expect(chunks).toHaveLength(5); expect(chunks[0].choices[0].delta.content).toBe("Hello "); expect(chunks[1].choices[0].delta.content).toBe("world"); + expect(chunks[2].choices[0].delta.tool_calls?.[0].id).toBe("call_1"); expect(chunks[2].choices[0].delta.tool_calls?.[0].function?.name).toBe( "test", ); - expect(chunks[3].usage).toBeDefined(); + expect(chunks[3].choices[0].delta.tool_calls?.[0].function?.arguments).toBe( + '{"arg":"value"}', + ); + expect(chunks[4].usage).toBeDefined(); }); test("throws error when stream contains error event", async () => { @@ -262,11 +269,6 @@ describe("convertVercelStream", () => { { type: "start-step" }, { type: "source", source: {} }, { type: "file", file: { name: "test.txt", content: "content" } }, - { - type: "tool-input-start", - id: "call_1", - toolName: "test", - }, { type: "tool-result", toolCallId: "call_1", result: {} }, ]; diff --git a/packages/openai-adapters/src/vercelStreamConverter.ts b/packages/openai-adapters/src/vercelStreamConverter.ts index 990053e663c..53f4cc75a8f 100644 --- a/packages/openai-adapters/src/vercelStreamConverter.ts +++ b/packages/openai-adapters/src/vercelStreamConverter.ts @@ -91,17 +91,19 @@ export function convertVercelStreamPart( model, }); - case "tool-call": + case "tool-input-start": + // Emit the initial chunk with id and function name, matching OpenAI's + // streaming format where the first tool call chunk carries the id/name. return chatChunkFromDelta({ delta: { tool_calls: [ { index: 0, - id: part.toolCallId, + id: part.id, type: "function" as const, function: { name: part.toolName, - arguments: JSON.stringify(part.input), + arguments: "", }, }, ], @@ -124,6 +126,12 @@ export function convertVercelStreamPart( model, }); + case "tool-call": + // tool-call is emitted after tool-input-start/delta/end have already + // streamed the complete tool call. Emitting it again would duplicate + // the arguments. Skip it since streaming events already handled it. + return null; + case "finish": if (part.totalUsage) { const inputTokens = @@ -178,7 +186,6 @@ export function convertVercelStreamPart( case "reasoning-end": case "source": case "file": - case "tool-input-start": case "tool-input-end": case "tool-result": case "start-step":