continuedev · sestinj · Mar 3, 2026 · Mar 3, 2026
@@ -383,40 +383,27 @@ export function recordStreamTelemetry(options: {
       cacheWriteTokens,
     });
 
-    // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard
-    if (actualInputTokens > 0) {
-      posthogService.capture("prompt_cache_metrics", {
-        model: model.model,
-        cache_read_tokens: cacheReadTokens,
-        cache_write_tokens: cacheWriteTokens,
-        total_prompt_tokens: actualInputTokens,
-        cache_hit_rate: cacheReadTokens / actualInputTokens,
-        tool_count: tools?.length ?? 0,
-      });
-    }
-  } catch {}
-
-  // Report prompt cache metrics to PostHog
-  if (fullUsage?.prompt_tokens_details) {
-    const cacheReadTokens =
-      fullUsage.prompt_tokens_details.cache_read_tokens ?? 0;
-    const cacheWriteTokens =
-      fullUsage.prompt_tokens_details.cache_write_tokens ?? 0;
-    const totalPromptTokens = fullUsage.prompt_tokens ?? 0;
-    const cacheHitRate =
-      totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0;
+    // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard.
+    // total_prompt_tokens must include ALL input token types: non-cached input
+    // tokens (prompt_tokens) + cache reads + cache writes. Anthropic's
+    // `input_tokens` field only counts non-cached tokens, so using it alone as
+    // the denominator produces ratios >> 1 when caching works well.
+    if (fullUsage?.prompt_tokens_details) {
+      const totalPromptTokens =
+        (fullUsage.prompt_tokens ?? 0) + cacheReadTokens + cacheWriteTokens;
+      const cacheHitRate =
+        totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0;
 
-    try {
-      void posthogService.capture("prompt_cache_metrics", {
+      posthogService.capture("prompt_cache_metrics", {
         model: model.model,
         cache_read_tokens: cacheReadTokens,
         cache_write_tokens: cacheWriteTokens,
         total_prompt_tokens: totalPromptTokens,
         cache_hit_rate: cacheHitRate,
         tool_count: tools?.length ?? 0,
       });
-    } catch {}
-  }
+    }
+  } catch {}
 
   return cost;
 }

@@ -37,7 +37,7 @@ describe("convertVercelStreamPart", () => {
     expect(result?.choices[0].delta.content).toBe("Let me think...");
   });
 
-  test("converts tool-call to chat chunk", () => {
+  test("returns null for tool-call (handled by tool-input-start/delta)", () => {
     const part: VercelStreamPart = {
       type: "tool-call",
       toolCallId: "call_abc123",
@@ -47,17 +47,7 @@ describe("convertVercelStreamPart", () => {
 
     const result = convertVercelStreamPart(part, options);
 
-    expect(result).not.toBeNull();
-    expect(result?.choices[0].delta.tool_calls).toHaveLength(1);
-    expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({
-      index: 0,
-      id: "call_abc123",
-      type: "function",
-      function: {
-        name: "readFile",
-        arguments: JSON.stringify({ filepath: "/path/to/file" }),
-      },
-    });
+    expect(result).toBeNull();
   });
 
   test("converts tool-input-delta to chat chunk", () => {
@@ -170,7 +160,7 @@ describe("convertVercelStreamPart", () => {
     expect(result).toBeNull();
   });
 
-  test("returns null for tool-input-start", () => {
+  test("converts tool-input-start to initial tool call chunk with id and name", () => {
     const part: VercelStreamPart = {
       type: "tool-input-start",
       id: "call_abc123",
@@ -179,7 +169,17 @@ describe("convertVercelStreamPart", () => {
 
     const result = convertVercelStreamPart(part, options);
 
-    expect(result).toBeNull();
+    expect(result).not.toBeNull();
+    expect(result?.choices[0].delta.tool_calls).toHaveLength(1);
+    expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({
+      index: 0,
+      id: "call_abc123",
+      type: "function",
+      function: {
+        name: "readFile",
+        arguments: "",
+      },
+    });
   });
 });
 
@@ -191,6 +191,9 @@ describe("convertVercelStream", () => {
       { type: "start-step" },
       { type: "text-delta", id: "text-1", text: "Hello " },
       { type: "text-delta", id: "text-1", text: "world" },
+      { type: "tool-input-start", id: "call_1", toolName: "test" },
+      { type: "tool-input-delta", id: "call_1", delta: '{"arg":"value"}' },
+      { type: "tool-input-end", id: "call_1" },
       {
         type: "tool-call",
         toolCallId: "call_1",
@@ -221,16 +224,20 @@ describe("convertVercelStream", () => {
       chunks.push(chunk);
     }
 
-    // Should only get chunks for: text-delta (2), tool-call (1), finish (1) = 4 chunks
-    // start-step and finish-step are filtered out
-    expect(chunks).toHaveLength(4);
+    // Should get chunks for: text-delta (2), tool-input-start (1), tool-input-delta (1), finish (1) = 5
+    // start-step, tool-input-end, tool-call, and finish-step are filtered out
+    expect(chunks).toHaveLength(5);
 
     expect(chunks[0].choices[0].delta.content).toBe("Hello ");
     expect(chunks[1].choices[0].delta.content).toBe("world");
+    expect(chunks[2].choices[0].delta.tool_calls?.[0].id).toBe("call_1");
     expect(chunks[2].choices[0].delta.tool_calls?.[0].function?.name).toBe(
       "test",
     );
-    expect(chunks[3].usage).toBeDefined();
+    expect(chunks[3].choices[0].delta.tool_calls?.[0].function?.arguments).toBe(
+      '{"arg":"value"}',
+    );
+    expect(chunks[4].usage).toBeDefined();
   });
 
   test("throws error when stream contains error event", async () => {
@@ -262,11 +269,6 @@ describe("convertVercelStream", () => {
       { type: "start-step" },
       { type: "source", source: {} },
       { type: "file", file: { name: "test.txt", content: "content" } },
-      {
-        type: "tool-input-start",
-        id: "call_1",
-        toolName: "test",
-      },
       { type: "tool-result", toolCallId: "call_1", result: {} },
     ];
 

@@ -91,17 +91,19 @@ export function convertVercelStreamPart(
         model,
       });
 
-    case "tool-call":
+    case "tool-input-start":
+      // Emit the initial chunk with id and function name, matching OpenAI's
+      // streaming format where the first tool call chunk carries the id/name.
       return chatChunkFromDelta({
         delta: {
           tool_calls: [
             {
               index: 0,
-              id: part.toolCallId,
+              id: part.id,
               type: "function" as const,
               function: {
                 name: part.toolName,
-                arguments: JSON.stringify(part.input),
+                arguments: "",
               },
             },
           ],
@@ -124,6 +126,12 @@ export function convertVercelStreamPart(
         model,
       });
 
+    case "tool-call":
+      // tool-call is emitted after tool-input-start/delta/end have already
+      // streamed the complete tool call. Emitting it again would duplicate
+      // the arguments. Skip it since streaming events already handled it.
+      return null;
+
     case "finish":
       if (part.totalUsage) {
         const inputTokens =
@@ -178,7 +186,6 @@ export function convertVercelStreamPart(
     case "reasoning-end":
     case "source":
     case "file":
-    case "tool-input-start":
     case "tool-input-end":
     case "tool-result":
     case "start-step":