From c907dc61f9c429dc113f59ca9380a5428035433a Mon Sep 17 00:00:00 2001 From: "lialia.sakhno" Date: Fri, 22 May 2026 20:45:05 +0300 Subject: [PATCH] fix(deepseek): extract prompt_cache_hit_tokens and reasoning_tokens from usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DeepSeek Text and Stream handlers hardcode `Usage` to only `prompt_tokens` and `completion_tokens`, silently dropping two DeepSeek-specific usage fields: - `usage.prompt_cache_hit_tokens` — cached input portion of the prompt. DeepSeek offers a 98% discount on cache hits (their headline feature) and reports the hit/miss split as separate counters. - `usage.completion_tokens_details.reasoning_tokens` — internal thinking tokens emitted by reasoning models (deepseek-reasoner, deepseek-v4-flash thinking mode). Without these, cost trackers that subscribe to `cacheReadInputTokens` see zero and charge the full `prompt_tokens` at fresh rate — overstating real spend ~3-5x once the prompt cache warms up. Reasoning-mode token usage is invisible to observability tooling. Both handlers now subtract `prompt_cache_hit_tokens` from `prompt_tokens` to derive the fresh-prompt count, and populate `Usage` with `cacheReadInputTokens` and `thoughtTokens`. Mirrors what the Gemini and OpenAI handlers already do for their analogous fields. The multi-step tools test asserts the new semantics: aggregated promptTokens reflects fresh-only counts and the previously-invisible cacheReadInputTokens is now exposed. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Providers/DeepSeek/Handlers/Stream.php | 10 ++++++++-- src/Providers/DeepSeek/Handlers/Text.php | 10 ++++++++-- tests/Providers/DeepSeek/TextTest.php | 9 +++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Providers/DeepSeek/Handlers/Stream.php b/src/Providers/DeepSeek/Handlers/Stream.php index 8ae26e57b..815fd2f20 100644 --- a/src/Providers/DeepSeek/Handlers/Stream.php +++ b/src/Providers/DeepSeek/Handlers/Stream.php @@ -357,9 +357,15 @@ protected function extractUsage(array $data): ?Usage return null; } + $totalPrompt = (int) data_get($usage, 'prompt_tokens', 0); + $cacheHit = (int) data_get($usage, 'prompt_cache_hit_tokens', 0); + $reasoning = (int) data_get($usage, 'completion_tokens_details.reasoning_tokens', 0); + return new Usage( - promptTokens: (int) data_get($usage, 'prompt_tokens', 0), - completionTokens: (int) data_get($usage, 'completion_tokens', 0) + promptTokens: max(0, $totalPrompt - $cacheHit), + completionTokens: (int) data_get($usage, 'completion_tokens', 0), + cacheReadInputTokens: $cacheHit > 0 ? $cacheHit : null, + thoughtTokens: $reasoning > 0 ? $reasoning : null, ); } diff --git a/src/Providers/DeepSeek/Handlers/Text.php b/src/Providers/DeepSeek/Handlers/Text.php index c8a15d253..5c67ce5e7 100644 --- a/src/Providers/DeepSeek/Handlers/Text.php +++ b/src/Providers/DeepSeek/Handlers/Text.php @@ -122,6 +122,10 @@ protected function sendRequest(Request $request): array */ protected function addStep(array $data, Request $request, array $toolResults = []): void { + $totalPrompt = (int) (data_get($data, 'usage.prompt_tokens') ?? 0); + $cacheHit = (int) (data_get($data, 'usage.prompt_cache_hit_tokens') ?? 0); + $reasoning = (int) (data_get($data, 'usage.completion_tokens_details.reasoning_tokens') ?? 0); + $this->responseBuilder->addStep(new Step( text: data_get($data, 'choices.0.message.content') ?? '', finishReason: $this->mapFinishReason($data), @@ -129,8 +133,10 @@ protected function addStep(array $data, Request $request, array $toolResults = [ toolResults: $toolResults, providerToolCalls: [], usage: new Usage( - data_get($data, 'usage.prompt_tokens'), - data_get($data, 'usage.completion_tokens'), + promptTokens: max(0, $totalPrompt - $cacheHit), + completionTokens: (int) (data_get($data, 'usage.completion_tokens') ?? 0), + cacheReadInputTokens: $cacheHit > 0 ? $cacheHit : null, + thoughtTokens: $reasoning > 0 ? $reasoning : null, ), meta: new Meta( id: data_get($data, 'id'), diff --git a/tests/Providers/DeepSeek/TextTest.php b/tests/Providers/DeepSeek/TextTest.php index 94c47c4d4..055537d2a 100644 --- a/tests/Providers/DeepSeek/TextTest.php +++ b/tests/Providers/DeepSeek/TextTest.php @@ -129,8 +129,13 @@ expect($secondStep->messages[1]->toolCalls[1]->name)->toBe('weather'); expect($secondStep->messages[2])->toBeInstanceOf(ToolResultMessage::class); - // Assert usage - expect($response->usage->promptTokens)->toBe(507); + // Assert usage. promptTokens is now the FRESH portion (prompt_tokens minus + // prompt_cache_hit_tokens) so cost trackers can apply the cached rate to the + // hit portion separately. Aggregated across both steps: + // step 1 fixture: prompt_tokens=220, prompt_cache_hit_tokens=192 → fresh 28, cached 192 + // step 2 fixture: prompt_tokens=287, prompt_cache_hit_tokens=256 → fresh 31, cached 256 + expect($response->usage->promptTokens)->toBe(59); + expect($response->usage->cacheReadInputTokens)->toBe(448); expect($response->usage->completionTokens)->toBe(76); // Assert response