Skip to content

Commit a35b586

Browse files
theblazehenclaude
andcommitted
fix(anthropic): cache thinking signatures server-side for multi-turn
Anthropic now requires a `signature` field on all thinking blocks. OpenAI-format clients (JS SDK etc.) can't preserve signatures across turns, causing 400 errors on multi-turn conversations with thinking. Cache thinking block signatures server-side (keyed by SHA-256 of the thinking content) and re-attach them when converting messages back to Anthropic format. Falls back to dropping thinking blocks gracefully when no cached signature is available. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7aa20d4 commit a35b586

1 file changed

Lines changed: 83 additions & 8 deletions

File tree

src/rotator_library/providers/anthropic_provider.py

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# src/rotator_library/providers/anthropic_provider.py
55

66
import copy
7+
import hashlib
78
import json
89
import os
910
import time
@@ -18,6 +19,7 @@
1819

1920
from .provider_interface import ProviderInterface
2021
from .anthropic_auth_base import AnthropicAuthBase
22+
from .provider_cache import create_provider_cache
2123
from ..timeout_config import TimeoutConfig
2224
from ..transaction_logger import ProviderLogger
2325

@@ -62,6 +64,22 @@
6264
"pause_turn": "stop",
6365
}
6466

67+
# Lazy-initialised server-side cache for thinking block signatures.
68+
# Allows us to re-attach signatures when OpenAI-format clients send back
69+
# reasoning_content without the signature (which they can't preserve).
70+
_thinking_sig_cache = None
71+
72+
73+
def _get_thinking_cache():
74+
global _thinking_sig_cache
75+
if _thinking_sig_cache is None:
76+
_thinking_sig_cache = create_provider_cache(
77+
"anthropic_thinking_signatures",
78+
memory_ttl_seconds=7200, # 2 hours in memory
79+
disk_ttl_seconds=172800, # 48 hours on disk
80+
)
81+
return _thinking_sig_cache
82+
6583

6684
class AnthropicProvider(AnthropicAuthBase, ProviderInterface):
6785
"""
@@ -155,15 +173,23 @@ def _openai_messages_to_anthropic(self, messages: List[Dict[str, Any]]) -> tuple
155173
blocks = []
156174

157175
reasoning = msg.get("reasoning_content")
158-
thinking_sig = msg.get("thinking_signature")
159176
if reasoning:
160-
thinking_block = {
161-
"type": "thinking",
162-
"thinking": reasoning,
163-
}
164-
if thinking_sig and len(thinking_sig) >= 100:
165-
thinking_block["signature"] = thinking_sig
166-
blocks.append(thinking_block)
177+
# Try server-side cache first (signature preserved from
178+
# the original Anthropic response)
179+
cached = self._retrieve_thinking_blocks(reasoning)
180+
if cached:
181+
blocks.extend(cached)
182+
else:
183+
# Fallback: inline signature from client (custom clients)
184+
thinking_sig = msg.get("thinking_signature")
185+
if thinking_sig and len(thinking_sig) >= 100:
186+
blocks.append({
187+
"type": "thinking",
188+
"thinking": reasoning,
189+
"signature": thinking_sig,
190+
})
191+
# else: no signature → drop thinking block,
192+
# model generates fresh thinking (cache miss on prefix)
167193

168194
if isinstance(content, str) and content.strip():
169195
blocks.append({"type": "text", "text": content})
@@ -251,6 +277,29 @@ def _openai_messages_to_anthropic(self, messages: List[Dict[str, Any]]) -> tuple
251277

252278
return system_blocks, anthropic_messages
253279

280+
def _retrieve_thinking_blocks(
281+
self, reasoning_content: str
282+
) -> Optional[List[Dict[str, Any]]]:
283+
"""Look up cached thinking blocks with signatures for given thinking content."""
284+
cache_key = hashlib.sha256(reasoning_content.encode()).hexdigest()
285+
cached = _get_thinking_cache().retrieve(cache_key)
286+
if not cached:
287+
return None
288+
try:
289+
blocks_data = json.loads(cached)
290+
result = [
291+
{
292+
"type": "thinking",
293+
"thinking": b["thinking"],
294+
"signature": b["signature"],
295+
}
296+
for b in blocks_data
297+
if b.get("signature")
298+
]
299+
return result if result else None
300+
except (json.JSONDecodeError, KeyError, TypeError):
301+
return None
302+
254303
def _openai_tools_to_anthropic(
255304
self, tools: Optional[List[Dict[str, Any]]]
256305
) -> Optional[List[Dict[str, Any]]]:
@@ -348,6 +397,10 @@ def _anthropic_event_to_openai_chunks(
348397
stream_state["current_block_type"] = block_type
349398
stream_state["current_block_index"] = index
350399

400+
if block_type == "thinking":
401+
stream_state["_block_thinking"] = ""
402+
stream_state["_block_signature"] = ""
403+
351404
if block_type == "tool_use":
352405
tool_id = block.get("id", f"toolu_{uuid.uuid4().hex[:12]}")
353406
raw_name = block.get("name", "")
@@ -423,6 +476,9 @@ def _anthropic_event_to_openai_chunks(
423476
stream_state["accumulated_thinking"] = (
424477
stream_state.get("accumulated_thinking", "") + thinking
425478
)
479+
stream_state["_block_thinking"] = (
480+
stream_state.get("_block_thinking", "") + thinking
481+
)
426482
yield {
427483
"choices": [
428484
{
@@ -473,10 +529,21 @@ def _anthropic_event_to_openai_chunks(
473529
stream_state["thinking_signature"] = (
474530
stream_state.get("thinking_signature", "") + sig
475531
)
532+
stream_state["_block_signature"] = (
533+
stream_state.get("_block_signature", "") + sig
534+
)
476535

477536
return
478537

479538
if event_type == "content_block_stop":
539+
if stream_state.get("current_block_type") == "thinking":
540+
block_thinking = stream_state.pop("_block_thinking", "")
541+
block_sig = stream_state.pop("_block_signature", "")
542+
if block_thinking and block_sig:
543+
stream_state.setdefault("_thinking_blocks", []).append({
544+
"thinking": block_thinking,
545+
"signature": block_sig,
546+
})
480547
return
481548

482549
if event_type == "message_delta":
@@ -509,6 +576,14 @@ def _anthropic_event_to_openai_chunks(
509576
"total_tokens": input_tokens + output_tokens,
510577
},
511578
}
579+
580+
# Cache thinking blocks with signatures for multi-turn preservation
581+
thinking_blocks = stream_state.get("_thinking_blocks")
582+
if thinking_blocks:
583+
full_thinking = "".join(b["thinking"] for b in thinking_blocks)
584+
cache_key = hashlib.sha256(full_thinking.encode()).hexdigest()
585+
_get_thinking_cache().store(cache_key, json.dumps(thinking_blocks))
586+
512587
return
513588

514589
# =========================================================================

0 commit comments

Comments
 (0)