code · pull · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/.github/workflows/backport_release.yaml b/.github/workflows/backport_release.yaml
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
@@ -1613,6 +1613,16 @@ def memory_required(self, input_shape):
         #use all ModelPatcherDynamic this is ignored and its all done dynamically.
         return super().memory_required(input_shape=input_shape) * 1.3 + (1024 ** 3)
 
+    def restore_loaded_backups(self):
+        restored = self.model.model_loaded_weight_memory
+        for key in list(self.backup.keys()):
+            bk = self.backup.pop(key)
+            comfy.utils.set_attr_param(self.model, key, bk.weight)
+        for key in list(self.backup_buffers.keys()):
+            comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key))
+        self.model.model_loaded_weight_memory = 0
+        return restored
+
 
     def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False, dirty=False):
 
@@ -1629,7 +1639,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False
 
         num_patches = 0
         allocated_size = 0
-        self.model.model_loaded_weight_memory = 0
+        self.restore_loaded_backups()
 
         with self.use_ejected():
             self.unpatch_hooks()
@@ -1716,6 +1726,9 @@ def force_load_param(self, param_key, device_to):
                         force_load=True
 
                     if force_load:
+                        if hasattr(m, "_v"):
+                            comfy_aimdo.model_vbar.vbar_unpin(m._v)
+                            delattr(m, "_v")
                         force_load_param(self, "weight", device_to)
                         force_load_param(self, "bias", device_to)
                     else:
@@ -1773,13 +1786,7 @@ def partially_unload(self, device_to, memory_to_free=0, force_patch_weights=Fals
         freed = 0 if vbar is None else vbar.free_memory(memory_to_free)
 
         if freed < memory_to_free:
-            for key in list(self.backup.keys()):
-                bk = self.backup.pop(key)
-                comfy.utils.set_attr_param(self.model, key, bk.weight)
-            for key in list(self.backup_buffers.keys()):
-                comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key))
-            freed += self.model.model_loaded_weight_memory
-            self.model.model_loaded_weight_memory = 0
+            freed += self.restore_loaded_backups()
 
         return freed
 

diff --git a/comfy_api_nodes/apis/anthropic.py b/comfy_api_nodes/apis/anthropic.py
@@ -35,6 +35,19 @@ class AnthropicMessage(BaseModel):
     content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)
 
 
+class AnthropicThinkingConfig(BaseModel):
+    type: Literal["enabled", "disabled", "adaptive"] = Field(...)
+    budget_tokens: int | None = Field(
+        None, ge=1024,
+        description="Reasoning budget in tokens. Used when type is 'enabled'. Must be less than max_tokens.",
+    )
+
+
+class AnthropicOutputConfig(BaseModel):
+    """Used with `thinking.type='adaptive'` on models like Opus 4.7."""
+    effort: Literal["low", "medium", "high"] | None = Field(None)
+
+
 class AnthropicMessagesRequest(BaseModel):
     model: str = Field(...)
     messages: list[AnthropicMessage] = Field(...)
@@ -44,13 +57,23 @@ class AnthropicMessagesRequest(BaseModel):
     top_p: float | None = Field(None, ge=0.0, le=1.0)
     top_k: int | None = Field(None, ge=0)
     stop_sequences: list[str] | None = Field(None)
+    thinking: AnthropicThinkingConfig | None = Field(None)
+    output_config: AnthropicOutputConfig | None = Field(None)
 
 
 class AnthropicResponseTextBlock(BaseModel):
     type: Literal["text"] = "text"
     text: str = Field(...)
 
 
+class AnthropicResponseThinkingBlock(BaseModel):
+    type: Literal["thinking"] = "thinking"
+    thinking: str = Field(...)
+
+
+AnthropicResponseBlock = AnthropicResponseTextBlock | AnthropicResponseThinkingBlock
+
+
 class AnthropicCacheCreationUsage(BaseModel):
     ephemeral_5m_input_tokens: int | None = Field(None)
     ephemeral_1h_input_tokens: int | None = Field(None)
@@ -69,7 +92,7 @@ class AnthropicMessagesResponse(BaseModel):
     type: str | None = Field(None)
     role: str | None = Field(None)
     model: str | None = Field(None)
-    content: list[AnthropicResponseTextBlock] | None = Field(None)
+    content: list[AnthropicResponseBlock] | None = Field(None)
     stop_reason: str | None = Field(None)
     stop_sequence: str | None = Field(None)
     usage: AnthropicMessagesUsage | None = Field(None)
diff --git a/comfy_api_nodes/apis/openrouter.py b/comfy_api_nodes/apis/openrouter.py
@@ -0,0 +1,93 @@
+"""Pydantic models for the OpenRouter chat completions API.
+
+See: https://openrouter.ai/docs/api/api-reference/chat/send-chat-completion-request
+"""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class OpenRouterTextContent(BaseModel):
+    type: Literal["text"] = "text"
+    text: str = Field(...)
+
+
+class OpenRouterImageUrl(BaseModel):
+    url: str = Field(...)
+
+
+class OpenRouterImageContent(BaseModel):
+    type: Literal["image_url"] = "image_url"
+    image_url: OpenRouterImageUrl = Field(...)
+
+
+class OpenRouterVideoUrl(BaseModel):
+    url: str = Field(...)
+
+
+class OpenRouterVideoContent(BaseModel):
+    type: Literal["video_url"] = "video_url"
+    video_url: OpenRouterVideoUrl = Field(...)
+
+
+OpenRouterContentBlock = OpenRouterTextContent | OpenRouterImageContent | OpenRouterVideoContent
+
+
+class OpenRouterMessage(BaseModel):
+    role: Literal["system", "user", "assistant"] = Field(...)
+    content: str | list[OpenRouterContentBlock] = Field(...)
+
+
+class OpenRouterReasoningConfig(BaseModel):
+    effort: str | None = Field(None)
+    exclude: bool | None = Field(None, description="If true, model reasons but reasoning is excluded from response.")
+
+
+class OpenRouterWebSearchOptions(BaseModel):
+    search_context_size: str | None = Field(None)
+
+
+class OpenRouterChatRequest(BaseModel):
+    model: str = Field(...)
+    messages: list[OpenRouterMessage] = Field(...)
+    seed: int | None = Field(None)
+    reasoning: OpenRouterReasoningConfig | None = Field(None)
+    web_search_options: OpenRouterWebSearchOptions | None = Field(None)
+    stream: bool = Field(False)
+
+
+class OpenRouterUsage(BaseModel):
+    prompt_tokens: int | None = Field(None)
+    completion_tokens: int | None = Field(None)
+    total_tokens: int | None = Field(None)
+    cost: float | None = Field(None, description="Server-side authoritative USD cost of the call.")
+
+
+class OpenRouterResponseMessage(BaseModel):
+    role: str | None = Field(None)
+    content: str | None = Field(None)
+    reasoning: str | None = Field(None)
+    refusal: str | None = Field(None)
+
+
+class OpenRouterChoice(BaseModel):
+    index: int | None = Field(None)
+    message: OpenRouterResponseMessage | None = Field(None)
+    finish_reason: str | None = Field(None)
+
+
+class OpenRouterError(BaseModel):
+    code: int | str | None = Field(None)
+    message: str | None = Field(None)
+    metadata: dict | None = Field(None)
+
+
+class OpenRouterChatResponse(BaseModel):
+    id: str | None = Field(None)
+    model: str | None = Field(None)
+    object: str | None = Field(None)
+    provider: str | None = Field(None)
+    choices: list[OpenRouterChoice] | None = Field(None)
+    usage: OpenRouterUsage | None = Field(None)
+    error: OpenRouterError | None = Field(None)
diff --git a/comfy_api_nodes/nodes_anthropic.py b/comfy_api_nodes/nodes_anthropic.py
@@ -9,8 +9,11 @@
     AnthropicMessage,
     AnthropicMessagesRequest,
     AnthropicMessagesResponse,
+    AnthropicOutputConfig,
+    AnthropicResponseTextBlock,
     AnthropicRole,
     AnthropicTextContent,
+    AnthropicThinkingConfig,
 )
 from comfy_api_nodes.util import (
     ApiEndpoint,
@@ -32,15 +35,29 @@
     "Haiku 4.5": "claude-haiku-4-5-20251001",
 }
 
+_THINKING_UNSUPPORTED = {"Haiku 4.5"}
+# Models that use the newer "adaptive" thinking mode (Opus 4.7 requires it; older models keep the explicit budget API).
+# Anthropic decides the actual budget when adaptive is used, based on the `output_config.effort` hint.
+_ADAPTIVE_THINKING_MODELS = {"Opus 4.7", "Opus 4.6", "Sonnet 4.6"}
 
-def _claude_model_inputs():
-    return [
+# Budget mode (Sonnet 4.5): effort -> reasoning budget in tokens. Must be < max_tokens.
+# Sized so even the "high" budget fits comfortably under the default max_tokens=32768.
+_REASONING_BUDGET: dict[str, int] = {
+    "low": 2048,
+    "medium": 8192,
+    "high": 16384,
+}
+_REASONING_EFFORTS = ["off", "low", "medium", "high"]
+
+
+def _claude_model_inputs(model_label: str):
+    inputs: list = [
         IO.Int.Input(
             "max_tokens",
-            default=16000,
-            min=32,
-            max=32000,
-            tooltip="Maximum number of tokens to generate before stopping.",
+            default=32768,
+            min=4096,
+            max=64000,
+            tooltip="Maximum number of tokens to generate (includes reasoning tokens when enabled).",
             advanced=True,
         ),
         IO.Float.Input(
@@ -49,10 +66,24 @@ def _claude_model_inputs():
             min=0.0,
             max=1.0,
             step=0.01,
-            tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random. Ignored for Opus 4.7.",
+            tooltip=(
+                "Controls randomness. 0.0 is deterministic, 1.0 is most random. "
+                "Ignored for Opus 4.7 and any model when reasoning_effort is set."
+            ),
             advanced=True,
         ),
     ]
+    if model_label not in _THINKING_UNSUPPORTED:
+        inputs.append(
+            IO.Combo.Input(
+                "reasoning_effort",
+                options=_REASONING_EFFORTS,
+                default="off",
+                tooltip="Extended thinking effort. 'off' disables reasoning.",
+                advanced=True,
+            )
+        )
+    return inputs
 
 
 def _model_price_per_million(model: str) -> tuple[float, float] | None:
@@ -95,7 +126,11 @@ def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
 def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
     if not response.content:
         return ""
-    return "\n".join(block.text for block in response.content if block.text)
+    # Thinking blocks are silently dropped — we never want reasoning in the output.
+    return "\n".join(
+        block.text for block in response.content
+        if isinstance(block, AnthropicResponseTextBlock) and block.text
+    )
 
 
 async def _build_image_content_blocks(
@@ -133,7 +168,10 @@ def define_schema(cls):
                 ),
                 IO.DynamicCombo.Input(
                     "model",
-                    options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
+                    options=[
+                        IO.DynamicCombo.Option(label, _claude_model_inputs(label))
+                        for label in CLAUDE_MODELS
+                    ],
                     tooltip="The Claude model used to generate the response.",
                 ),
                 IO.Int.Input(
@@ -207,8 +245,29 @@ async def execute(
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=True, min_length=1)
         model_label = model["model"]
-        max_tokens = model["max_tokens"]
-        temperature = None if model_label == "Opus 4.7" else model["temperature"]
+        max_tokens = model.get("max_tokens", 32768)
+        reasoning_effort = model.get("reasoning_effort", "off")
+        thinking_enabled = reasoning_effort not in ("off", None) and model_label not in _THINKING_UNSUPPORTED
+
+        # Anthropic requires temperature to be unset (defaults to 1.0) when thinking is enabled.
+        # Opus 4.7 also rejects user-supplied temperature.
+        if thinking_enabled or model_label == "Opus 4.7":
+            temperature = None
+        else:
+            temperature = model.get("temperature", 1.0)
+
+        thinking_cfg: AnthropicThinkingConfig | None = None
+        output_cfg: AnthropicOutputConfig | None = None
+        if thinking_enabled:
+            if model_label in _ADAPTIVE_THINKING_MODELS:
+                # Adaptive mode - Anthropic chooses the budget based on effort hint
+                thinking_cfg = AnthropicThinkingConfig(type="adaptive")
+                output_cfg = AnthropicOutputConfig(effort=reasoning_effort)
+            else:
+                # Budget mode (Sonnet 4.5). Leave at least 1024 tokens for the actual response
+                budget = _REASONING_BUDGET[reasoning_effort]
+                budget = min(budget, max(1024, max_tokens - 1024))
+                thinking_cfg = AnthropicThinkingConfig(type="enabled", budget_tokens=budget)
 
         image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
         if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
@@ -229,6 +288,8 @@ async def execute(
                 messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
                 system=system_prompt or None,
                 temperature=temperature,
+                thinking=thinking_cfg,
+                output_config=output_cfg,
             ),
             price_extractor=calculate_tokens_price,
         )