Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
401 changes: 401 additions & 0 deletions .github/workflows/backport_release.yaml

Large diffs are not rendered by default.

23 changes: 15 additions & 8 deletions comfy/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1613,6 +1613,16 @@ def memory_required(self, input_shape):
#use all ModelPatcherDynamic this is ignored and its all done dynamically.
return super().memory_required(input_shape=input_shape) * 1.3 + (1024 ** 3)

def restore_loaded_backups(self):
restored = self.model.model_loaded_weight_memory
for key in list(self.backup.keys()):
bk = self.backup.pop(key)
comfy.utils.set_attr_param(self.model, key, bk.weight)
for key in list(self.backup_buffers.keys()):
comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key))
self.model.model_loaded_weight_memory = 0
return restored


def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False, dirty=False):

Expand All @@ -1629,7 +1639,7 @@ def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False

num_patches = 0
allocated_size = 0
self.model.model_loaded_weight_memory = 0
self.restore_loaded_backups()

with self.use_ejected():
self.unpatch_hooks()
Expand Down Expand Up @@ -1716,6 +1726,9 @@ def force_load_param(self, param_key, device_to):
force_load=True

if force_load:
if hasattr(m, "_v"):
comfy_aimdo.model_vbar.vbar_unpin(m._v)
delattr(m, "_v")
force_load_param(self, "weight", device_to)
force_load_param(self, "bias", device_to)
else:
Expand Down Expand Up @@ -1773,13 +1786,7 @@ def partially_unload(self, device_to, memory_to_free=0, force_patch_weights=Fals
freed = 0 if vbar is None else vbar.free_memory(memory_to_free)

if freed < memory_to_free:
for key in list(self.backup.keys()):
bk = self.backup.pop(key)
comfy.utils.set_attr_param(self.model, key, bk.weight)
for key in list(self.backup_buffers.keys()):
comfy.utils.set_attr_buffer(self.model, key, self.backup_buffers.pop(key))
freed += self.model.model_loaded_weight_memory
self.model.model_loaded_weight_memory = 0
freed += self.restore_loaded_backups()

return freed

Expand Down
25 changes: 24 additions & 1 deletion comfy_api_nodes/apis/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ class AnthropicMessage(BaseModel):
content: list[AnthropicTextContent | AnthropicImageContent] = Field(...)


class AnthropicThinkingConfig(BaseModel):
type: Literal["enabled", "disabled", "adaptive"] = Field(...)
budget_tokens: int | None = Field(
None, ge=1024,
description="Reasoning budget in tokens. Used when type is 'enabled'. Must be less than max_tokens.",
)


class AnthropicOutputConfig(BaseModel):
"""Used with `thinking.type='adaptive'` on models like Opus 4.7."""
effort: Literal["low", "medium", "high"] | None = Field(None)


class AnthropicMessagesRequest(BaseModel):
model: str = Field(...)
messages: list[AnthropicMessage] = Field(...)
Expand All @@ -44,13 +57,23 @@ class AnthropicMessagesRequest(BaseModel):
top_p: float | None = Field(None, ge=0.0, le=1.0)
top_k: int | None = Field(None, ge=0)
stop_sequences: list[str] | None = Field(None)
thinking: AnthropicThinkingConfig | None = Field(None)
output_config: AnthropicOutputConfig | None = Field(None)


class AnthropicResponseTextBlock(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)


class AnthropicResponseThinkingBlock(BaseModel):
type: Literal["thinking"] = "thinking"
thinking: str = Field(...)


AnthropicResponseBlock = AnthropicResponseTextBlock | AnthropicResponseThinkingBlock


class AnthropicCacheCreationUsage(BaseModel):
ephemeral_5m_input_tokens: int | None = Field(None)
ephemeral_1h_input_tokens: int | None = Field(None)
Expand All @@ -69,7 +92,7 @@ class AnthropicMessagesResponse(BaseModel):
type: str | None = Field(None)
role: str | None = Field(None)
model: str | None = Field(None)
content: list[AnthropicResponseTextBlock] | None = Field(None)
content: list[AnthropicResponseBlock] | None = Field(None)
stop_reason: str | None = Field(None)
stop_sequence: str | None = Field(None)
usage: AnthropicMessagesUsage | None = Field(None)
93 changes: 93 additions & 0 deletions comfy_api_nodes/apis/openrouter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Pydantic models for the OpenRouter chat completions API.

See: https://openrouter.ai/docs/api/api-reference/chat/send-chat-completion-request
"""

from typing import Literal

from pydantic import BaseModel, Field


class OpenRouterTextContent(BaseModel):
type: Literal["text"] = "text"
text: str = Field(...)


class OpenRouterImageUrl(BaseModel):
url: str = Field(...)


class OpenRouterImageContent(BaseModel):
type: Literal["image_url"] = "image_url"
image_url: OpenRouterImageUrl = Field(...)


class OpenRouterVideoUrl(BaseModel):
url: str = Field(...)


class OpenRouterVideoContent(BaseModel):
type: Literal["video_url"] = "video_url"
video_url: OpenRouterVideoUrl = Field(...)


OpenRouterContentBlock = OpenRouterTextContent | OpenRouterImageContent | OpenRouterVideoContent


class OpenRouterMessage(BaseModel):
role: Literal["system", "user", "assistant"] = Field(...)
content: str | list[OpenRouterContentBlock] = Field(...)


class OpenRouterReasoningConfig(BaseModel):
effort: str | None = Field(None)
exclude: bool | None = Field(None, description="If true, model reasons but reasoning is excluded from response.")


class OpenRouterWebSearchOptions(BaseModel):
search_context_size: str | None = Field(None)


class OpenRouterChatRequest(BaseModel):
model: str = Field(...)
messages: list[OpenRouterMessage] = Field(...)
seed: int | None = Field(None)
reasoning: OpenRouterReasoningConfig | None = Field(None)
web_search_options: OpenRouterWebSearchOptions | None = Field(None)
stream: bool = Field(False)


class OpenRouterUsage(BaseModel):
prompt_tokens: int | None = Field(None)
completion_tokens: int | None = Field(None)
total_tokens: int | None = Field(None)
cost: float | None = Field(None, description="Server-side authoritative USD cost of the call.")


class OpenRouterResponseMessage(BaseModel):
role: str | None = Field(None)
content: str | None = Field(None)
reasoning: str | None = Field(None)
refusal: str | None = Field(None)


class OpenRouterChoice(BaseModel):
index: int | None = Field(None)
message: OpenRouterResponseMessage | None = Field(None)
finish_reason: str | None = Field(None)


class OpenRouterError(BaseModel):
code: int | str | None = Field(None)
message: str | None = Field(None)
metadata: dict | None = Field(None)


class OpenRouterChatResponse(BaseModel):
id: str | None = Field(None)
model: str | None = Field(None)
object: str | None = Field(None)
provider: str | None = Field(None)
choices: list[OpenRouterChoice] | None = Field(None)
usage: OpenRouterUsage | None = Field(None)
error: OpenRouterError | None = Field(None)
83 changes: 72 additions & 11 deletions comfy_api_nodes/nodes_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
AnthropicMessage,
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicOutputConfig,
AnthropicResponseTextBlock,
AnthropicRole,
AnthropicTextContent,
AnthropicThinkingConfig,
)
from comfy_api_nodes.util import (
ApiEndpoint,
Expand All @@ -32,15 +35,29 @@
"Haiku 4.5": "claude-haiku-4-5-20251001",
}

_THINKING_UNSUPPORTED = {"Haiku 4.5"}
# Models that use the newer "adaptive" thinking mode (Opus 4.7 requires it; older models keep the explicit budget API).
# Anthropic decides the actual budget when adaptive is used, based on the `output_config.effort` hint.
_ADAPTIVE_THINKING_MODELS = {"Opus 4.7", "Opus 4.6", "Sonnet 4.6"}

def _claude_model_inputs():
return [
# Budget mode (Sonnet 4.5): effort -> reasoning budget in tokens. Must be < max_tokens.
# Sized so even the "high" budget fits comfortably under the default max_tokens=32768.
_REASONING_BUDGET: dict[str, int] = {
"low": 2048,
"medium": 8192,
"high": 16384,
}
_REASONING_EFFORTS = ["off", "low", "medium", "high"]


def _claude_model_inputs(model_label: str):
inputs: list = [
IO.Int.Input(
"max_tokens",
default=16000,
min=32,
max=32000,
tooltip="Maximum number of tokens to generate before stopping.",
default=32768,
min=4096,
max=64000,
tooltip="Maximum number of tokens to generate (includes reasoning tokens when enabled).",
advanced=True,
),
IO.Float.Input(
Expand All @@ -49,10 +66,24 @@ def _claude_model_inputs():
min=0.0,
max=1.0,
step=0.01,
tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random. Ignored for Opus 4.7.",
tooltip=(
"Controls randomness. 0.0 is deterministic, 1.0 is most random. "
"Ignored for Opus 4.7 and any model when reasoning_effort is set."
),
advanced=True,
),
]
if model_label not in _THINKING_UNSUPPORTED:
inputs.append(
IO.Combo.Input(
"reasoning_effort",
options=_REASONING_EFFORTS,
default="off",
tooltip="Extended thinking effort. 'off' disables reasoning.",
advanced=True,
)
)
return inputs


def _model_price_per_million(model: str) -> tuple[float, float] | None:
Expand Down Expand Up @@ -95,7 +126,11 @@ def calculate_tokens_price(response: AnthropicMessagesResponse) -> float | None:
def _get_text_from_response(response: AnthropicMessagesResponse) -> str:
if not response.content:
return ""
return "\n".join(block.text for block in response.content if block.text)
# Thinking blocks are silently dropped — we never want reasoning in the output.
return "\n".join(
block.text for block in response.content
if isinstance(block, AnthropicResponseTextBlock) and block.text
)


async def _build_image_content_blocks(
Expand Down Expand Up @@ -133,7 +168,10 @@ def define_schema(cls):
),
IO.DynamicCombo.Input(
"model",
options=[IO.DynamicCombo.Option(label, _claude_model_inputs()) for label in CLAUDE_MODELS],
options=[
IO.DynamicCombo.Option(label, _claude_model_inputs(label))
for label in CLAUDE_MODELS
],
tooltip="The Claude model used to generate the response.",
),
IO.Int.Input(
Expand Down Expand Up @@ -207,8 +245,29 @@ async def execute(
) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1)
model_label = model["model"]
max_tokens = model["max_tokens"]
temperature = None if model_label == "Opus 4.7" else model["temperature"]
max_tokens = model.get("max_tokens", 32768)
reasoning_effort = model.get("reasoning_effort", "off")
thinking_enabled = reasoning_effort not in ("off", None) and model_label not in _THINKING_UNSUPPORTED

# Anthropic requires temperature to be unset (defaults to 1.0) when thinking is enabled.
# Opus 4.7 also rejects user-supplied temperature.
if thinking_enabled or model_label == "Opus 4.7":
temperature = None
else:
temperature = model.get("temperature", 1.0)

thinking_cfg: AnthropicThinkingConfig | None = None
output_cfg: AnthropicOutputConfig | None = None
if thinking_enabled:
if model_label in _ADAPTIVE_THINKING_MODELS:
# Adaptive mode - Anthropic chooses the budget based on effort hint
thinking_cfg = AnthropicThinkingConfig(type="adaptive")
output_cfg = AnthropicOutputConfig(effort=reasoning_effort)
else:
# Budget mode (Sonnet 4.5). Leave at least 1024 tokens for the actual response
budget = _REASONING_BUDGET[reasoning_effort]
budget = min(budget, max(1024, max_tokens - 1024))
thinking_cfg = AnthropicThinkingConfig(type="enabled", budget_tokens=budget)

image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
Expand All @@ -229,6 +288,8 @@ async def execute(
messages=[AnthropicMessage(role=AnthropicRole.user, content=content)],
system=system_prompt or None,
temperature=temperature,
thinking=thinking_cfg,
output_config=output_cfg,
),
price_extractor=calculate_tokens_price,
)
Expand Down
Loading
Loading