From c907dc61f9c429dc113f59ca9380a5428035433a Mon Sep 17 00:00:00 2001
From: "lialia.sakhno" <lintume@gmail.com>
Date: Fri, 22 May 2026 20:45:05 +0300
Subject: [PATCH] fix(deepseek): extract prompt_cache_hit_tokens and
 reasoning_tokens from usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The DeepSeek Text and Stream handlers hardcode `Usage` to only `prompt_tokens`
and `completion_tokens`, silently dropping two DeepSeek-specific usage fields:

- `usage.prompt_cache_hit_tokens` — cached input portion of the prompt.
  DeepSeek offers a 98% discount on cache hits (their headline feature)
  and reports the hit/miss split as separate counters.
- `usage.completion_tokens_details.reasoning_tokens` — internal thinking
  tokens emitted by reasoning models (deepseek-reasoner, deepseek-v4-flash
  thinking mode).

Without these, cost trackers that subscribe to `cacheReadInputTokens` see
zero and charge the full `prompt_tokens` at fresh rate — overstating real
spend ~3-5x once the prompt cache warms up. Reasoning-mode token usage
is invisible to observability tooling.

Both handlers now subtract `prompt_cache_hit_tokens` from `prompt_tokens`
to derive the fresh-prompt count, and populate `Usage` with
`cacheReadInputTokens` and `thoughtTokens`. Mirrors what the Gemini and
OpenAI handlers already do for their analogous fields.

The multi-step tools test asserts the new semantics: aggregated
promptTokens reflects fresh-only counts and the previously-invisible
cacheReadInputTokens is now exposed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/Providers/DeepSeek/Handlers/Stream.php | 10 ++++++++--
 src/Providers/DeepSeek/Handlers/Text.php   | 10 ++++++++--
 tests/Providers/DeepSeek/TextTest.php      |  9 +++++++--
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/Providers/DeepSeek/Handlers/Stream.php b/src/Providers/DeepSeek/Handlers/Stream.php
index 8ae26e57b..815fd2f20 100644
--- a/src/Providers/DeepSeek/Handlers/Stream.php
+++ b/src/Providers/DeepSeek/Handlers/Stream.php
@@ -357,9 +357,15 @@ protected function extractUsage(array $data): ?Usage
             return null;
         }
 
+        $totalPrompt = (int) data_get($usage, 'prompt_tokens', 0);
+        $cacheHit = (int) data_get($usage, 'prompt_cache_hit_tokens', 0);
+        $reasoning = (int) data_get($usage, 'completion_tokens_details.reasoning_tokens', 0);
+
         return new Usage(
-            promptTokens: (int) data_get($usage, 'prompt_tokens', 0),
-            completionTokens: (int) data_get($usage, 'completion_tokens', 0)
+            promptTokens: max(0, $totalPrompt - $cacheHit),
+            completionTokens: (int) data_get($usage, 'completion_tokens', 0),
+            cacheReadInputTokens: $cacheHit > 0 ? $cacheHit : null,
+            thoughtTokens: $reasoning > 0 ? $reasoning : null,
         );
     }
 
diff --git a/src/Providers/DeepSeek/Handlers/Text.php b/src/Providers/DeepSeek/Handlers/Text.php
index c8a15d253..5c67ce5e7 100644
--- a/src/Providers/DeepSeek/Handlers/Text.php
+++ b/src/Providers/DeepSeek/Handlers/Text.php
@@ -122,6 +122,10 @@ protected function sendRequest(Request $request): array
      */
     protected function addStep(array $data, Request $request, array $toolResults = []): void
     {
+        $totalPrompt = (int) (data_get($data, 'usage.prompt_tokens') ?? 0);
+        $cacheHit = (int) (data_get($data, 'usage.prompt_cache_hit_tokens') ?? 0);
+        $reasoning = (int) (data_get($data, 'usage.completion_tokens_details.reasoning_tokens') ?? 0);
+
         $this->responseBuilder->addStep(new Step(
             text: data_get($data, 'choices.0.message.content') ?? '',
             finishReason: $this->mapFinishReason($data),
@@ -129,8 +133,10 @@ protected function addStep(array $data, Request $request, array $toolResults = [
             toolResults: $toolResults,
             providerToolCalls: [],
             usage: new Usage(
-                data_get($data, 'usage.prompt_tokens'),
-                data_get($data, 'usage.completion_tokens'),
+                promptTokens: max(0, $totalPrompt - $cacheHit),
+                completionTokens: (int) (data_get($data, 'usage.completion_tokens') ?? 0),
+                cacheReadInputTokens: $cacheHit > 0 ? $cacheHit : null,
+                thoughtTokens: $reasoning > 0 ? $reasoning : null,
             ),
             meta: new Meta(
                 id: data_get($data, 'id'),
diff --git a/tests/Providers/DeepSeek/TextTest.php b/tests/Providers/DeepSeek/TextTest.php
index 94c47c4d4..055537d2a 100644
--- a/tests/Providers/DeepSeek/TextTest.php
+++ b/tests/Providers/DeepSeek/TextTest.php
@@ -129,8 +129,13 @@
     expect($secondStep->messages[1]->toolCalls[1]->name)->toBe('weather');
     expect($secondStep->messages[2])->toBeInstanceOf(ToolResultMessage::class);
 
-    // Assert usage
-    expect($response->usage->promptTokens)->toBe(507);
+    // Assert usage. promptTokens is now the FRESH portion (prompt_tokens minus
+    // prompt_cache_hit_tokens) so cost trackers can apply the cached rate to the
+    // hit portion separately. Aggregated across both steps:
+    //   step 1 fixture: prompt_tokens=220, prompt_cache_hit_tokens=192 → fresh 28, cached 192
+    //   step 2 fixture: prompt_tokens=287, prompt_cache_hit_tokens=256 → fresh 31, cached 256
+    expect($response->usage->promptTokens)->toBe(59);
+    expect($response->usage->cacheReadInputTokens)->toBe(448);
     expect($response->usage->completionTokens)->toBe(76);
 
     // Assert response