Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 13 additions & 26 deletions extensions/cli/src/stream/streamChatResponse.helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -383,40 +383,27 @@ export function recordStreamTelemetry(options: {
cacheWriteTokens,
});

// Emit prompt_cache_metrics for the Prompt Cache Performance dashboard
if (actualInputTokens > 0) {
posthogService.capture("prompt_cache_metrics", {
model: model.model,
cache_read_tokens: cacheReadTokens,
cache_write_tokens: cacheWriteTokens,
total_prompt_tokens: actualInputTokens,
cache_hit_rate: cacheReadTokens / actualInputTokens,
tool_count: tools?.length ?? 0,
});
}
} catch {}

// Report prompt cache metrics to PostHog
if (fullUsage?.prompt_tokens_details) {
const cacheReadTokens =
fullUsage.prompt_tokens_details.cache_read_tokens ?? 0;
const cacheWriteTokens =
fullUsage.prompt_tokens_details.cache_write_tokens ?? 0;
const totalPromptTokens = fullUsage.prompt_tokens ?? 0;
const cacheHitRate =
totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0;
// Emit prompt_cache_metrics for the Prompt Cache Performance dashboard.
// total_prompt_tokens must include ALL input token types: non-cached input
// tokens (prompt_tokens) + cache reads + cache writes. Anthropic's
// `input_tokens` field only counts non-cached tokens, so using it alone as
// the denominator produces ratios >> 1 when caching works well.
if (fullUsage?.prompt_tokens_details) {
const totalPromptTokens =
(fullUsage.prompt_tokens ?? 0) + cacheReadTokens + cacheWriteTokens;
const cacheHitRate =
totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0;

try {
void posthogService.capture("prompt_cache_metrics", {
posthogService.capture("prompt_cache_metrics", {
model: model.model,
cache_read_tokens: cacheReadTokens,
cache_write_tokens: cacheWriteTokens,
total_prompt_tokens: totalPromptTokens,
cache_hit_rate: cacheHitRate,
tool_count: tools?.length ?? 0,
});
} catch {}
}
}
} catch {}

return cost;
}
Expand Down
48 changes: 25 additions & 23 deletions packages/openai-adapters/src/test/vercelStreamConverter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ describe("convertVercelStreamPart", () => {
expect(result?.choices[0].delta.content).toBe("Let me think...");
});

test("converts tool-call to chat chunk", () => {
test("returns null for tool-call (handled by tool-input-start/delta)", () => {
const part: VercelStreamPart = {
type: "tool-call",
toolCallId: "call_abc123",
Expand All @@ -47,17 +47,7 @@ describe("convertVercelStreamPart", () => {

const result = convertVercelStreamPart(part, options);

expect(result).not.toBeNull();
expect(result?.choices[0].delta.tool_calls).toHaveLength(1);
expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({
index: 0,
id: "call_abc123",
type: "function",
function: {
name: "readFile",
arguments: JSON.stringify({ filepath: "/path/to/file" }),
},
});
expect(result).toBeNull();
});

test("converts tool-input-delta to chat chunk", () => {
Expand Down Expand Up @@ -170,7 +160,7 @@ describe("convertVercelStreamPart", () => {
expect(result).toBeNull();
});

test("returns null for tool-input-start", () => {
test("converts tool-input-start to initial tool call chunk with id and name", () => {
const part: VercelStreamPart = {
type: "tool-input-start",
id: "call_abc123",
Expand All @@ -179,7 +169,17 @@ describe("convertVercelStreamPart", () => {

const result = convertVercelStreamPart(part, options);

expect(result).toBeNull();
expect(result).not.toBeNull();
expect(result?.choices[0].delta.tool_calls).toHaveLength(1);
expect(result?.choices[0].delta.tool_calls?.[0]).toEqual({
index: 0,
id: "call_abc123",
type: "function",
function: {
name: "readFile",
arguments: "",
},
});
});
});

Expand All @@ -191,6 +191,9 @@ describe("convertVercelStream", () => {
{ type: "start-step" },
{ type: "text-delta", id: "text-1", text: "Hello " },
{ type: "text-delta", id: "text-1", text: "world" },
{ type: "tool-input-start", id: "call_1", toolName: "test" },
{ type: "tool-input-delta", id: "call_1", delta: '{"arg":"value"}' },
{ type: "tool-input-end", id: "call_1" },
{
type: "tool-call",
toolCallId: "call_1",
Expand Down Expand Up @@ -221,16 +224,20 @@ describe("convertVercelStream", () => {
chunks.push(chunk);
}

// Should only get chunks for: text-delta (2), tool-call (1), finish (1) = 4 chunks
// start-step and finish-step are filtered out
expect(chunks).toHaveLength(4);
// Should get chunks for: text-delta (2), tool-input-start (1), tool-input-delta (1), finish (1) = 5
// start-step, tool-input-end, tool-call, and finish-step are filtered out
expect(chunks).toHaveLength(5);

expect(chunks[0].choices[0].delta.content).toBe("Hello ");
expect(chunks[1].choices[0].delta.content).toBe("world");
expect(chunks[2].choices[0].delta.tool_calls?.[0].id).toBe("call_1");
expect(chunks[2].choices[0].delta.tool_calls?.[0].function?.name).toBe(
"test",
);
expect(chunks[3].usage).toBeDefined();
expect(chunks[3].choices[0].delta.tool_calls?.[0].function?.arguments).toBe(
'{"arg":"value"}',
);
expect(chunks[4].usage).toBeDefined();
});

test("throws error when stream contains error event", async () => {
Expand Down Expand Up @@ -262,11 +269,6 @@ describe("convertVercelStream", () => {
{ type: "start-step" },
{ type: "source", source: {} },
{ type: "file", file: { name: "test.txt", content: "content" } },
{
type: "tool-input-start",
id: "call_1",
toolName: "test",
},
{ type: "tool-result", toolCallId: "call_1", result: {} },
];

Expand Down
15 changes: 11 additions & 4 deletions packages/openai-adapters/src/vercelStreamConverter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,19 @@ export function convertVercelStreamPart(
model,
});

case "tool-call":
case "tool-input-start":
// Emit the initial chunk with id and function name, matching OpenAI's
// streaming format where the first tool call chunk carries the id/name.
return chatChunkFromDelta({
delta: {
tool_calls: [
{
index: 0,
id: part.toolCallId,
id: part.id,
type: "function" as const,
function: {
name: part.toolName,
arguments: JSON.stringify(part.input),
arguments: "",
},
},
],
Expand All @@ -124,6 +126,12 @@ export function convertVercelStreamPart(
model,
});

case "tool-call":
// tool-call is emitted after tool-input-start/delta/end have already
// streamed the complete tool call. Emitting it again would duplicate
// the arguments. Skip it since streaming events already handled it.
return null;

case "finish":
if (part.totalUsage) {
const inputTokens =
Expand Down Expand Up @@ -178,7 +186,6 @@ export function convertVercelStreamPart(
case "reasoning-end":
case "source":
case "file":
case "tool-input-start":
case "tool-input-end":
case "tool-result":
case "start-step":
Expand Down
Loading