diff --git a/invokeai/app/api/dependencies.py b/invokeai/app/api/dependencies.py index 339a0ceadb4..65522637a27 100644 --- a/invokeai/app/api/dependencies.py +++ b/invokeai/app/api/dependencies.py @@ -46,6 +46,7 @@ from invokeai.app.services.workflow_records.workflow_records_sqlite import SqliteWorkflowRecordsStorage from invokeai.app.services.workflow_thumbnails.workflow_thumbnails_disk import WorkflowThumbnailFileStorageDisk from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( + AnimaConditioningInfo, BasicConditioningInfo, CogView4ConditioningInfo, ConditioningFieldData, @@ -140,6 +141,7 @@ def initialize( SD3ConditioningInfo, CogView4ConditioningInfo, ZImageConditioningInfo, + AnimaConditioningInfo, ], ephemeral=True, ), diff --git a/invokeai/app/invocations/anima_denoise.py b/invokeai/app/invocations/anima_denoise.py new file mode 100644 index 00000000000..a284b5dcd4c --- /dev/null +++ b/invokeai/app/invocations/anima_denoise.py @@ -0,0 +1,700 @@ +"""Anima denoising invocation. + +Implements the rectified flow denoising loop for Anima models: +- Direct prediction: denoised = input - output * sigma +- Fixed shift=3.0 via loglinear_timestep_shift (Flux paper by Black Forest Labs) +- Timestep convention: timestep = sigma * 1.0 (raw sigma, NOT 1-sigma like Z-Image) +- NO v-prediction negation (unlike Z-Image) +- 3D latent space: [B, C, T, H, W] with T=1 for images +- 16 latent channels, 8x spatial compression + +Key differences from Z-Image denoise: +- Anima uses fixed shift=3.0, Z-Image uses dynamic shift based on resolution +- Anima: timestep = sigma (raw), Z-Image: model_t = 1.0 - sigma +- Anima: noise_pred = model_output (direct), Z-Image: noise_pred = -model_output (v-pred) +- Anima transformer takes (x, timesteps, context, t5xxl_ids, t5xxl_weights) +- Anima uses 3D latents directly, Z-Image converts 4D -> list of 5D +""" + +import inspect +import math +from contextlib import ExitStack +from typing import Callable, Iterator, Optional, Tuple + +import torch +import torchvision.transforms as tv_transforms +from diffusers.schedulers.scheduling_utils import SchedulerMixin +from torchvision.transforms.functional import resize as tv_resize +from tqdm import tqdm + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + AnimaConditioningField, + DenoiseMaskField, + FieldDescriptions, + Input, + InputField, + LatentsField, +) +from invokeai.app.invocations.model import TransformerField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.anima.anima_transformer_patch import patch_anima_for_regional_prompting +from invokeai.backend.anima.conditioning_data import AnimaRegionalTextConditioning, AnimaTextConditioning +from invokeai.backend.anima.regional_prompting import AnimaRegionalPromptingExtension +from invokeai.backend.flux.schedulers import ANIMA_SCHEDULER_LABELS, ANIMA_SCHEDULER_MAP, ANIMA_SCHEDULER_NAME_VALUES +from invokeai.backend.model_manager.taxonomy import BaseModelType +from invokeai.backend.patches.layer_patcher import LayerPatcher +from invokeai.backend.patches.lora_conversions.anima_lora_constants import ANIMA_LORA_TRANSFORMER_PREFIX +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw +from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import ( + RectifiedFlowInpaintExtension, + assert_broadcastable, +) +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import AnimaConditioningInfo, Range +from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState +from invokeai.backend.util.devices import TorchDevice + +# Anima uses 8x spatial compression (VAE downsamples by 2^3) +ANIMA_LATENT_SCALE_FACTOR = 8 +# Anima uses 16 latent channels +ANIMA_LATENT_CHANNELS = 16 +# Anima uses fixed shift=3.0 for the rectified flow schedule +ANIMA_SHIFT = 3.0 +# Anima uses raw sigma values as timesteps (no rescaling) +ANIMA_MULTIPLIER = 1.0 + + +def loglinear_timestep_shift(alpha: float, t: float) -> float: + """Apply log-linear timestep shift to a noise schedule value. + + This shift biases the noise schedule toward higher noise levels, as described + in the Flux model (Black Forest Labs, 2024). With alpha > 1, the model spends + proportionally more denoising steps at higher noise levels. + + Formula: sigma = alpha * t / (1 + (alpha - 1) * t) + + Args: + alpha: Shift factor (3.0 for Anima, resolution-dependent for Flux). + t: Timestep value in [0, 1]. + + Returns: + Shifted timestep value. + """ + if alpha == 1.0: + return t + return alpha * t / (1 + (alpha - 1) * t) + + +def inverse_loglinear_timestep_shift(alpha: float, sigma: float) -> float: + """Recover linear t from a shifted sigma value. + + Inverse of loglinear_timestep_shift: given sigma = alpha * t / (1 + (alpha-1) * t), + solve for t = sigma / (alpha - (alpha-1) * sigma). + + This is needed for the inpainting extension, which expects linear t values + for gradient mask thresholding. With Anima's shift=3.0, the difference + between shifted sigma and linear t is large (e.g. at t=0.5, sigma=0.75), + causing overly aggressive mask thresholding if sigma is used directly. + + Args: + alpha: Shift factor (3.0 for Anima). + sigma: Shifted sigma value in [0, 1]. + + Returns: + Linear t value in [0, 1]. + """ + if alpha == 1.0: + return sigma + denominator = alpha - (alpha - 1) * sigma + if abs(denominator) < 1e-8: + return 1.0 + return sigma / denominator + + +class AnimaInpaintExtension(RectifiedFlowInpaintExtension): + """Inpaint extension for Anima that accounts for the time-SNR shift. + + Anima uses a fixed shift=3.0 which makes sigma values significantly larger + than the corresponding linear t values. The base RectifiedFlowInpaintExtension + uses t_prev for both gradient mask thresholding and noise mixing, which assumes + linear t values. + + This subclass: + - Uses the LINEAR t for gradient mask thresholding (correct progressive reveal) + - Uses the SHIFTED sigma for noise mixing (matches the denoiser's noise level) + """ + + def __init__( + self, + init_latents: torch.Tensor, + inpaint_mask: torch.Tensor, + noise: torch.Tensor, + shift: float = ANIMA_SHIFT, + ): + assert_broadcastable(init_latents.shape, inpaint_mask.shape, noise.shape) + self._init_latents = init_latents + self._inpaint_mask = inpaint_mask + self._noise = noise + self._shift = shift + + def merge_intermediate_latents_with_init_latents( + self, intermediate_latents: torch.Tensor, sigma_prev: float + ) -> torch.Tensor: + """Merge intermediate latents with init latents, correcting for Anima's shift. + + Args: + intermediate_latents: The denoised latents at the current step. + sigma_prev: The SHIFTED sigma value for the next step. + """ + # Recover linear t from shifted sigma for gradient mask thresholding. + # This ensures the gradient mask is revealed at the correct pace. + t_prev = inverse_loglinear_timestep_shift(self._shift, sigma_prev) + mask = self._apply_mask_gradient_adjustment(t_prev) + + # Use shifted sigma for noise mixing to match the denoiser's noise level. + # The Euler step produces latents at noise level sigma_prev, so the + # preserved regions must also be at sigma_prev noise level. + noised_init_latents = self._noise * sigma_prev + (1.0 - sigma_prev) * self._init_latents + + return intermediate_latents * mask + noised_init_latents * (1.0 - mask) + + +@invocation( + "anima_denoise", + title="Denoise - Anima", + tags=["image", "anima"], + category="image", + version="1.2.0", + classification=Classification.Prototype, +) +class AnimaDenoiseInvocation(BaseInvocation): + """Run the denoising process with an Anima model. + + Uses rectified flow sampling with shift=3.0 and the Cosmos Predict2 DiT + backbone with integrated LLM Adapter for text conditioning. + + Supports txt2img, img2img (via latents input), and inpainting (via denoise_mask). + """ + + # If latents is provided, this means we are doing image-to-image. + latents: Optional[LatentsField] = InputField( + default=None, description=FieldDescriptions.latents, input=Input.Connection + ) + # denoise_mask is used for inpainting. Only the masked region is modified. + denoise_mask: Optional[DenoiseMaskField] = InputField( + default=None, description=FieldDescriptions.denoise_mask, input=Input.Connection + ) + denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start) + denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end) + add_noise: bool = InputField(default=True, description="Add noise based on denoising start.") + transformer: TransformerField = InputField( + description="Anima transformer model.", input=Input.Connection, title="Transformer" + ) + positive_conditioning: AnimaConditioningField | list[AnimaConditioningField] = InputField( + description=FieldDescriptions.positive_cond, input=Input.Connection + ) + negative_conditioning: AnimaConditioningField | list[AnimaConditioningField] | None = InputField( + default=None, description=FieldDescriptions.negative_cond, input=Input.Connection + ) + guidance_scale: float = InputField( + default=4.5, + ge=1.0, + description="Guidance scale for classifier-free guidance. Recommended: 4.0-5.0 for Anima.", + title="Guidance Scale", + ) + width: int = InputField(default=1024, multiple_of=8, description="Width of the generated image.") + height: int = InputField(default=1024, multiple_of=8, description="Height of the generated image.") + steps: int = InputField(default=30, gt=0, description="Number of denoising steps. 30 recommended for Anima.") + seed: int = InputField(default=0, description="Randomness seed for reproducibility.") + scheduler: ANIMA_SCHEDULER_NAME_VALUES = InputField( + default="euler", + description="Scheduler (sampler) for the denoising process.", + ui_choice_labels=ANIMA_SCHEDULER_LABELS, + ) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = self._run_diffusion(context) + latents = latents.detach().to("cpu") + name = context.tensors.save(tensor=latents) + return LatentsOutput.build(latents_name=name, latents=latents, seed=None) + + def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None: + """Prepare the inpaint mask for Anima. + + Anima uses 3D latents [B, C, T, H, W] internally but the mask operates + on the spatial dimensions [B, C, H, W] which match the squeezed output. + """ + if self.denoise_mask is None: + return None + mask = context.tensors.load(self.denoise_mask.mask_name) + + # Invert mask: 0.0 = regions to denoise, 1.0 = regions to preserve + mask = 1.0 - mask + + _, _, latent_height, latent_width = latents.shape + mask = tv_resize( + img=mask, + size=[latent_height, latent_width], + interpolation=tv_transforms.InterpolationMode.BILINEAR, + antialias=False, + ) + + mask = mask.to(device=latents.device, dtype=latents.dtype) + return mask + + def _get_noise( + self, + height: int, + width: int, + dtype: torch.dtype, + device: torch.device, + seed: int, + ) -> torch.Tensor: + """Generate initial noise tensor in 3D latent space [B, C, T, H, W].""" + rand_device = "cpu" + return torch.randn( + 1, + ANIMA_LATENT_CHANNELS, + 1, # T=1 for single image + height // ANIMA_LATENT_SCALE_FACTOR, + width // ANIMA_LATENT_SCALE_FACTOR, + device=rand_device, + dtype=torch.float32, + generator=torch.Generator(device=rand_device).manual_seed(seed), + ).to(device=device, dtype=dtype) + + def _get_sigmas(self, num_steps: int) -> list[float]: + """Generate sigma schedule with fixed shift=3.0. + + Uses the log-linear timestep shift from the Flux model (Black Forest Labs) + with a fixed shift factor of 3.0 (no dynamic resolution-based shift). + + Returns: + List of num_steps + 1 sigma values from ~1.0 (noise) to 0.0 (clean). + """ + sigmas = [] + for i in range(num_steps + 1): + t = 1.0 - i / num_steps + sigma = loglinear_timestep_shift(ANIMA_SHIFT, t) + sigmas.append(sigma) + return sigmas + + def _load_conditioning( + self, + context: InvocationContext, + cond_field: AnimaConditioningField, + dtype: torch.dtype, + device: torch.device, + ) -> AnimaConditioningInfo: + """Load Anima conditioning data from storage.""" + cond_data = context.conditioning.load(cond_field.conditioning_name) + assert len(cond_data.conditionings) == 1 + cond_info = cond_data.conditionings[0] + assert isinstance(cond_info, AnimaConditioningInfo) + return cond_info.to(dtype=dtype, device=device) + + def _load_text_conditionings( + self, + context: InvocationContext, + cond_field: AnimaConditioningField | list[AnimaConditioningField], + img_token_height: int, + img_token_width: int, + dtype: torch.dtype, + device: torch.device, + ) -> list[AnimaTextConditioning]: + """Load Anima text conditioning with optional regional masks. + + Args: + context: The invocation context. + cond_field: Single conditioning field or list of fields. + img_token_height: Height of the image token grid (H // patch_size). + img_token_width: Width of the image token grid (W // patch_size). + dtype: Target dtype. + device: Target device. + + Returns: + List of AnimaTextConditioning objects with optional masks. + """ + cond_list = cond_field if isinstance(cond_field, list) else [cond_field] + + text_conditionings: list[AnimaTextConditioning] = [] + for cond in cond_list: + cond_info = self._load_conditioning(context, cond, dtype, device) + + # Load the mask, if provided + mask: torch.Tensor | None = None + if cond.mask is not None: + mask = context.tensors.load(cond.mask.tensor_name) + mask = mask.to(device=device) + mask = AnimaRegionalPromptingExtension.preprocess_regional_prompt_mask( + mask, img_token_height, img_token_width, dtype, device + ) + + text_conditionings.append( + AnimaTextConditioning( + qwen3_embeds=cond_info.qwen3_embeds, + t5xxl_ids=cond_info.t5xxl_ids, + t5xxl_weights=cond_info.t5xxl_weights, + mask=mask, + ) + ) + + return text_conditionings + + def _run_llm_adapter_for_regions( + self, + transformer, + text_conditionings: list[AnimaTextConditioning], + dtype: torch.dtype, + ) -> AnimaRegionalTextConditioning: + """Run the LLM Adapter separately for each regional conditioning and concatenate. + + Args: + transformer: The AnimaTransformer instance (must be on device). + text_conditionings: List of per-region conditioning data. + dtype: Inference dtype. + + Returns: + AnimaRegionalTextConditioning with concatenated context and masks. + """ + context_embeds_list: list[torch.Tensor] = [] + context_ranges: list[Range] = [] + image_masks: list[torch.Tensor | None] = [] + cur_len = 0 + + for tc in text_conditionings: + qwen3_embeds = tc.qwen3_embeds.unsqueeze(0) # (1, seq_len, 1024) + t5xxl_ids = tc.t5xxl_ids.unsqueeze(0) # (1, seq_len) + t5xxl_weights = None + if tc.t5xxl_weights is not None: + t5xxl_weights = tc.t5xxl_weights.unsqueeze(0).unsqueeze(-1) # (1, seq_len, 1) + + # Run the LLM Adapter to produce context for this region + context = transformer.preprocess_text_embeds( + qwen3_embeds.to(dtype=dtype), + t5xxl_ids, + t5xxl_weights=t5xxl_weights.to(dtype=dtype) if t5xxl_weights is not None else None, + ) + # context shape: (1, 512, 1024) — squeeze batch dim + context_2d = context.squeeze(0) # (512, 1024) + + context_embeds_list.append(context_2d) + context_ranges.append(Range(start=cur_len, end=cur_len + context_2d.shape[0])) + image_masks.append(tc.mask) + cur_len += context_2d.shape[0] + + concatenated_context = torch.cat(context_embeds_list, dim=0) + + return AnimaRegionalTextConditioning( + context_embeds=concatenated_context, + image_masks=image_masks, + context_ranges=context_ranges, + ) + + def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: + device = TorchDevice.choose_torch_device() + inference_dtype = TorchDevice.choose_bfloat16_safe_dtype(device) + + transformer_info = context.models.load(self.transformer.transformer) + + # Compute image token grid dimensions for regional prompting + # Anima: 8x VAE compression, 2x patch size → 16x total + patch_size = 2 + latent_height = self.height // ANIMA_LATENT_SCALE_FACTOR + latent_width = self.width // ANIMA_LATENT_SCALE_FACTOR + img_token_height = latent_height // patch_size + img_token_width = latent_width // patch_size + img_seq_len = img_token_height * img_token_width + + # Load positive conditioning with optional regional masks + pos_text_conditionings = self._load_text_conditionings( + context=context, + cond_field=self.positive_conditioning, + img_token_height=img_token_height, + img_token_width=img_token_width, + dtype=inference_dtype, + device=device, + ) + has_regional = len(pos_text_conditionings) > 1 or any(tc.mask is not None for tc in pos_text_conditionings) + + # Load negative conditioning if CFG is enabled + do_cfg = not math.isclose(self.guidance_scale, 1.0) and self.negative_conditioning is not None + neg_text_conditionings: list[AnimaTextConditioning] | None = None + if do_cfg: + assert self.negative_conditioning is not None + neg_text_conditionings = self._load_text_conditionings( + context=context, + cond_field=self.negative_conditioning, + img_token_height=img_token_height, + img_token_width=img_token_width, + dtype=inference_dtype, + device=device, + ) + + # Generate sigma schedule + sigmas = self._get_sigmas(self.steps) + + # Apply denoising_start and denoising_end clipping (for img2img/inpaint) + if self.denoising_start > 0 or self.denoising_end < 1: + total_sigmas = len(sigmas) + start_idx = int(self.denoising_start * (total_sigmas - 1)) + end_idx = int(self.denoising_end * (total_sigmas - 1)) + 1 + sigmas = sigmas[start_idx:end_idx] + + total_steps = len(sigmas) - 1 + + # Load input latents if provided (image-to-image) + init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None + if init_latents is not None: + init_latents = init_latents.to(device=device, dtype=inference_dtype) + # Anima denoiser works in 3D: add temporal dim if needed + if init_latents.ndim == 4: + init_latents = init_latents.unsqueeze(2) # [B, C, H, W] -> [B, C, 1, H, W] + + # Generate initial noise (3D latent: [B, C, T, H, W]) + noise = self._get_noise(self.height, self.width, inference_dtype, device, self.seed) + + # Prepare input latents + if init_latents is not None: + if self.add_noise: + s_0 = sigmas[0] + latents = s_0 * noise + (1.0 - s_0) * init_latents + else: + latents = init_latents + else: + if self.denoising_start > 1e-5: + raise ValueError("denoising_start should be 0 when initial latents are not provided.") + latents = noise + + if total_steps <= 0: + return latents.squeeze(2) + + # Prepare inpaint extension + inpaint_mask = self._prep_inpaint_mask(context, latents.squeeze(2)) + inpaint_extension: AnimaInpaintExtension | None = None + if inpaint_mask is not None: + if init_latents is None: + raise ValueError("Initial latents are required when using an inpaint mask (image-to-image inpainting)") + inpaint_extension = AnimaInpaintExtension( + init_latents=init_latents.squeeze(2), + inpaint_mask=inpaint_mask, + noise=noise.squeeze(2), + shift=ANIMA_SHIFT, + ) + + step_callback = self._build_step_callback(context) + + # Initialize diffusers scheduler if not using built-in Euler + scheduler: SchedulerMixin | None = None + use_scheduler = self.scheduler != "euler" + + if use_scheduler: + scheduler_class = ANIMA_SCHEDULER_MAP[self.scheduler] + scheduler = scheduler_class(num_train_timesteps=1000, shift=1.0) + is_lcm = self.scheduler == "lcm" + set_timesteps_sig = inspect.signature(scheduler.set_timesteps) + if not is_lcm and "sigmas" in set_timesteps_sig.parameters: + scheduler.set_timesteps(sigmas=sigmas, device=device) + else: + scheduler.set_timesteps(num_inference_steps=total_steps, device=device) + num_scheduler_steps = len(scheduler.timesteps) + else: + num_scheduler_steps = total_steps + + with ExitStack() as exit_stack: + (cached_weights, transformer) = exit_stack.enter_context(transformer_info.model_on_device()) + + # Apply LoRA models to the transformer. + # Note: We apply the LoRA after the transformer has been moved to its target device for faster patching. + exit_stack.enter_context( + LayerPatcher.apply_smart_model_patches( + model=transformer, + patches=self._lora_iterator(context), + prefix=ANIMA_LORA_TRANSFORMER_PREFIX, + dtype=inference_dtype, + cached_weights=cached_weights, + ) + ) + + # Run LLM Adapter for each regional conditioning to produce context vectors. + # This must happen with the transformer on device since it uses the adapter weights. + if has_regional: + pos_regional = self._run_llm_adapter_for_regions(transformer, pos_text_conditionings, inference_dtype) + pos_context = pos_regional.context_embeds.unsqueeze(0) # (1, total_ctx_len, 1024) + + # Build regional prompting extension with cross-attention mask + regional_extension = AnimaRegionalPromptingExtension.from_regional_conditioning( + pos_regional, img_seq_len + ) + + # For negative, concatenate all regions without masking (matches Z-Image behavior) + neg_context = None + if do_cfg and neg_text_conditionings is not None: + neg_regional = self._run_llm_adapter_for_regions( + transformer, neg_text_conditionings, inference_dtype + ) + neg_context = neg_regional.context_embeds.unsqueeze(0) + else: + # Single conditioning — run LLM Adapter via normal forward path + tc = pos_text_conditionings[0] + pos_qwen3_embeds = tc.qwen3_embeds.unsqueeze(0) + pos_t5xxl_ids = tc.t5xxl_ids.unsqueeze(0) + pos_t5xxl_weights = None + if tc.t5xxl_weights is not None: + pos_t5xxl_weights = tc.t5xxl_weights.unsqueeze(0).unsqueeze(-1) + + # Pre-compute context via LLM Adapter + pos_context = transformer.preprocess_text_embeds( + pos_qwen3_embeds.to(dtype=inference_dtype), + pos_t5xxl_ids, + t5xxl_weights=pos_t5xxl_weights.to(dtype=inference_dtype) if pos_t5xxl_weights is not None else None, + ) + + neg_context = None + if do_cfg and neg_text_conditionings is not None: + ntc = neg_text_conditionings[0] + neg_qwen3 = ntc.qwen3_embeds.unsqueeze(0) + neg_ids = ntc.t5xxl_ids.unsqueeze(0) + neg_weights = None + if ntc.t5xxl_weights is not None: + neg_weights = ntc.t5xxl_weights.unsqueeze(0).unsqueeze(-1) + neg_context = transformer.preprocess_text_embeds( + neg_qwen3.to(dtype=inference_dtype), + neg_ids, + t5xxl_weights=neg_weights.to(dtype=inference_dtype) if neg_weights is not None else None, + ) + + regional_extension = None + + # Apply regional prompting patch if we have regional masks + exit_stack.enter_context( + patch_anima_for_regional_prompting(transformer, regional_extension) + ) + + # Helper to run transformer with pre-computed context (bypasses LLM Adapter) + def _run_transformer(ctx: torch.Tensor) -> torch.Tensor: + return transformer( + x=latents.to(transformer.dtype if hasattr(transformer, 'dtype') else inference_dtype), + timesteps=timestep, + context=ctx, + # t5xxl_ids=None skips the LLM Adapter — context is already pre-computed + ) + + if use_scheduler and scheduler is not None: + # Scheduler-based denoising + user_step = 0 + pbar = tqdm(total=total_steps, desc="Denoising (Anima)") + for step_index in range(num_scheduler_steps): + sched_timestep = scheduler.timesteps[step_index] + sigma_curr = sched_timestep.item() / scheduler.config.num_train_timesteps + + is_heun = hasattr(scheduler, "state_in_first_order") + in_first_order = scheduler.state_in_first_order if is_heun else True + + timestep = torch.tensor( + [sigma_curr * ANIMA_MULTIPLIER], device=device, dtype=inference_dtype + ).expand(latents.shape[0]) + + noise_pred_cond = _run_transformer(pos_context).float() + + if do_cfg and neg_context is not None: + noise_pred_uncond = _run_transformer(neg_context).float() + noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond) + else: + noise_pred = noise_pred_cond + + step_output = scheduler.step(model_output=noise_pred, timestep=sched_timestep, sample=latents) + latents = step_output.prev_sample + + if step_index + 1 < len(scheduler.sigmas): + sigma_prev = scheduler.sigmas[step_index + 1].item() + else: + sigma_prev = 0.0 + + if inpaint_extension is not None: + latents_4d = latents.squeeze(2) + latents_4d = inpaint_extension.merge_intermediate_latents_with_init_latents( + latents_4d, sigma_prev + ) + latents = latents_4d.unsqueeze(2) + + if is_heun: + if not in_first_order: + user_step += 1 + if user_step <= total_steps: + pbar.update(1) + step_callback(PipelineIntermediateState( + step=user_step, order=2, total_steps=total_steps, + timestep=int(sigma_curr * 1000), latents=latents.squeeze(2), + )) + else: + user_step += 1 + if user_step <= total_steps: + pbar.update(1) + step_callback(PipelineIntermediateState( + step=user_step, order=1, total_steps=total_steps, + timestep=int(sigma_curr * 1000), latents=latents.squeeze(2), + )) + pbar.close() + else: + # Built-in Euler implementation (default for Anima) + for step_idx in tqdm(range(total_steps), desc="Denoising (Anima)"): + sigma_curr = sigmas[step_idx] + sigma_prev = sigmas[step_idx + 1] + + timestep = torch.tensor( + [sigma_curr * ANIMA_MULTIPLIER], device=device, dtype=inference_dtype + ).expand(latents.shape[0]) + + noise_pred_cond = _run_transformer(pos_context).float() + + if do_cfg and neg_context is not None: + noise_pred_uncond = _run_transformer(neg_context).float() + noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond) + else: + noise_pred = noise_pred_cond + + latents_dtype = latents.dtype + latents = latents.to(dtype=torch.float32) + latents = latents + (sigma_prev - sigma_curr) * noise_pred + latents = latents.to(dtype=latents_dtype) + + if inpaint_extension is not None: + latents_4d = latents.squeeze(2) + latents_4d = inpaint_extension.merge_intermediate_latents_with_init_latents( + latents_4d, sigma_prev + ) + latents = latents_4d.unsqueeze(2) + + step_callback( + PipelineIntermediateState( + step=step_idx + 1, + order=1, + total_steps=total_steps, + timestep=int(sigma_curr * 1000), + latents=latents.squeeze(2), + ), + ) + + # Remove temporal dimension for output: [B, C, 1, H, W] -> [B, C, H, W] + return latents.squeeze(2) + + def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]: + def step_callback(state: PipelineIntermediateState) -> None: + context.util.sd_step_callback(state, BaseModelType.Anima) + + return step_callback + + def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]: + """Iterate over LoRA models to apply to the transformer.""" + for lora in self.transformer.loras: + lora_info = context.models.load(lora.lora) + if not isinstance(lora_info.model, ModelPatchRaw): + raise TypeError( + f"Expected ModelPatchRaw for LoRA '{lora.lora.key}', got {type(lora_info.model).__name__}. " + "The LoRA model may be corrupted or incompatible." + ) + yield (lora_info.model, lora.weight) + del lora_info diff --git a/invokeai/app/invocations/anima_image_to_latents.py b/invokeai/app/invocations/anima_image_to_latents.py new file mode 100644 index 00000000000..1bb260a9ae0 --- /dev/null +++ b/invokeai/app/invocations/anima_image_to_latents.py @@ -0,0 +1,121 @@ +"""Anima image-to-latents invocation. + +Encodes an image to latent space using the Anima VAE (AutoencoderKLWan or FLUX VAE). + +For Wan VAE (AutoencoderKLWan): +- Input image is converted to 5D tensor [B, C, T, H, W] with T=1 +- After encoding, latents are normalized: (latents - mean) / std + (inverse of the denormalization in anima_latents_to_image.py) + +For FLUX VAE (AutoEncoder): +- Encoding is handled internally by the FLUX VAE +""" + +from typing import Union + +import einops +import torch +from diffusers.models.autoencoders import AutoencoderKLWan + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + ImageField, + Input, + InputField, + WithBoard, + WithMetadata, +) +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder +from invokeai.backend.model_manager.load.load_base import LoadedModel +from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux + +AnimaVAE = Union[AutoencoderKLWan, FluxAutoEncoder] + + +@invocation( + "anima_i2l", + title="Image to Latents - Anima", + tags=["image", "latents", "vae", "i2l", "anima"], + category="image", + version="1.0.0", + classification=Classification.Prototype, +) +class AnimaImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard): + """Generates latents from an image using the Anima VAE (supports Wan 2.1 and FLUX VAE).""" + + image: ImageField = InputField(description="The image to encode.") + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection) + + @staticmethod + def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor: + if not isinstance(vae_info.model, (AutoencoderKLWan, FluxAutoEncoder)): + raise TypeError( + f"Expected AutoencoderKLWan or FluxAutoEncoder for Anima VAE, got {type(vae_info.model).__name__}." + ) + + estimated_working_memory = estimate_vae_working_memory_flux( + operation="encode", + image_tensor=image_tensor, + vae=vae_info.model, + ) + + with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): + if not isinstance(vae, (AutoencoderKLWan, FluxAutoEncoder)): + raise TypeError( + f"Expected AutoencoderKLWan or FluxAutoEncoder, got {type(vae).__name__}." + ) + + vae_dtype = next(iter(vae.parameters())).dtype + image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + + with torch.inference_mode(): + if isinstance(vae, FluxAutoEncoder): + # FLUX VAE handles scaling internally + generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0) + latents = vae.encode(image_tensor, sample=True, generator=generator) + else: + # AutoencoderKLWan expects 5D input [B, C, T, H, W] + if image_tensor.ndim == 4: + image_tensor = image_tensor.unsqueeze(2) # [B, C, H, W] -> [B, C, 1, H, W] + + encoded = vae.encode(image_tensor, return_dict=False)[0] + latents = encoded.sample().to(dtype=vae_dtype) + + # Normalize to denoiser space: (latents - mean) / std + # This is the inverse of the denormalization in anima_latents_to_image.py + latents_mean = torch.tensor(vae.config.latents_mean).view(1, -1, 1, 1, 1).to(latents) + latents_std = torch.tensor(vae.config.latents_std).view(1, -1, 1, 1, 1).to(latents) + latents = (latents - latents_mean) / latents_std + + # Remove temporal dimension: [B, C, 1, H, W] -> [B, C, H, W] + if latents.ndim == 5: + latents = latents.squeeze(2) + + return latents + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> LatentsOutput: + image = context.images.get_pil(self.image.image_name) + + image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) + if image_tensor.dim() == 3: + image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w") + + vae_info = context.models.load(self.vae.vae) + if not isinstance(vae_info.model, (AutoencoderKLWan, FluxAutoEncoder)): + raise TypeError( + f"Expected AutoencoderKLWan or FluxAutoEncoder for Anima VAE, got {type(vae_info.model).__name__}." + ) + + context.util.signal_progress("Running Anima VAE encode") + latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor) + + latents = latents.to("cpu") + name = context.tensors.save(tensor=latents) + return LatentsOutput.build(latents_name=name, latents=latents, seed=None) diff --git a/invokeai/app/invocations/anima_latents_to_image.py b/invokeai/app/invocations/anima_latents_to_image.py new file mode 100644 index 00000000000..4ea3fac9319 --- /dev/null +++ b/invokeai/app/invocations/anima_latents_to_image.py @@ -0,0 +1,110 @@ +"""Anima latents-to-image invocation. + +Decodes Anima latents using the QwenImage VAE (AutoencoderKLWan) or +compatible FLUX VAE as fallback. + +Latents from the denoiser are in normalized space (zero-centered). Before +VAE decode, they must be denormalized using the Wan 2.1 per-channel +mean/std: latents = latents * std + mean (matching diffusers WanPipeline). + +The VAE expects 5D latents [B, C, T, H, W] — for single images, T=1. +""" + +import torch +from diffusers.models.autoencoders import AutoencoderKLWan +from einops import rearrange +from PIL import Image + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + Input, + InputField, + LatentsField, + WithBoard, + WithMetadata, +) +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import ImageOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux + + +@invocation( + "anima_l2i", + title="Latents to Image - Anima", + tags=["latents", "image", "vae", "l2i", "anima"], + category="latents", + version="1.0.2", + classification=Classification.Prototype, +) +class AnimaLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard): + """Generates an image from latents using the Anima VAE. + + Supports the Wan 2.1 QwenImage VAE (AutoencoderKLWan) with explicit + latent denormalization, and FLUX VAE as fallback. + """ + + latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection) + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> ImageOutput: + latents = context.tensors.load(self.latents.latents_name) + + vae_info = context.models.load(self.vae.vae) + if not isinstance(vae_info.model, (AutoencoderKLWan, FluxAutoEncoder)): + raise TypeError( + f"Expected AutoencoderKLWan or FluxAutoEncoder for Anima VAE, got {type(vae_info.model).__name__}." + ) + + is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder) + + estimated_working_memory = estimate_vae_working_memory_flux( + operation="decode", + image_tensor=latents, + vae=vae_info.model, + ) + + with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): + context.util.signal_progress("Running Anima VAE decode") + if not isinstance(vae, (AutoencoderKLWan, FluxAutoEncoder)): + raise TypeError(f"Expected AutoencoderKLWan or FluxAutoEncoder, got {type(vae).__name__}.") + + vae_dtype = next(iter(vae.parameters())).dtype + latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + + TorchDevice.empty_cache() + + with torch.inference_mode(): + if isinstance(vae, FluxAutoEncoder): + # FLUX VAE handles scaling internally, expects 4D [B, C, H, W] + img = vae.decode(latents) + else: + # Expects 5D latents [B, C, T, H, W] + if latents.ndim == 4: + latents = latents.unsqueeze(2) # [B, C, H, W] -> [B, C, 1, H, W] + + # Denormalize from denoiser space to raw VAE space + # (same as diffusers WanPipeline and ComfyUI Wan21.process_out) + latents_mean = torch.tensor(vae.config.latents_mean).view(1, -1, 1, 1, 1).to(latents) + latents_std = torch.tensor(vae.config.latents_std).view(1, -1, 1, 1, 1).to(latents) + latents = latents * latents_std + latents_mean + + decoded = vae.decode(latents, return_dict=False)[0] + + # Output is 5D [B, C, T, H, W] — squeeze temporal dim + if decoded.ndim == 5: + decoded = decoded.squeeze(2) + img = decoded + + img = img.clamp(-1, 1) + img = rearrange(img[0], "c h w -> h w c") + img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) + + TorchDevice.empty_cache() + + image_dto = context.images.save(image=img_pil) + return ImageOutput.build(image_dto) diff --git a/invokeai/app/invocations/anima_lora_loader.py b/invokeai/app/invocations/anima_lora_loader.py new file mode 100644 index 00000000000..0f09b795b88 --- /dev/null +++ b/invokeai/app/invocations/anima_lora_loader.py @@ -0,0 +1,151 @@ +from typing import Optional + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + invocation, + invocation_output, +) +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField +from invokeai.app.invocations.model import LoRAField, ModelIdentifierField, Qwen3EncoderField, TransformerField +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType + + +@invocation_output("anima_lora_loader_output") +class AnimaLoRALoaderOutput(BaseInvocationOutput): + """Anima LoRA Loader Output""" + + transformer: Optional[TransformerField] = OutputField( + default=None, description=FieldDescriptions.transformer, title="Anima Transformer" + ) + qwen3_encoder: Optional[Qwen3EncoderField] = OutputField( + default=None, description=FieldDescriptions.qwen3_encoder, title="Qwen3 Encoder" + ) + + +@invocation( + "anima_lora_loader", + title="Apply LoRA - Anima", + tags=["lora", "model", "anima"], + category="model", + version="1.0.0", +) +class AnimaLoRALoaderInvocation(BaseInvocation): + """Apply a LoRA model to an Anima transformer and/or Qwen3 text encoder.""" + + lora: ModelIdentifierField = InputField( + description=FieldDescriptions.lora_model, + title="LoRA", + ui_model_base=BaseModelType.Anima, + ui_model_type=ModelType.LoRA, + ) + weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight) + transformer: TransformerField | None = InputField( + default=None, + description=FieldDescriptions.transformer, + input=Input.Connection, + title="Anima Transformer", + ) + qwen3_encoder: Qwen3EncoderField | None = InputField( + default=None, + title="Qwen3 Encoder", + description=FieldDescriptions.qwen3_encoder, + input=Input.Connection, + ) + + def invoke(self, context: InvocationContext) -> AnimaLoRALoaderOutput: + lora_key = self.lora.key + + if not context.models.exists(lora_key): + raise ValueError(f"Unknown lora: {lora_key}!") + + if self.transformer and any(lora.lora.key == lora_key for lora in self.transformer.loras): + raise ValueError(f'LoRA "{lora_key}" already applied to transformer.') + if self.qwen3_encoder and any(lora.lora.key == lora_key for lora in self.qwen3_encoder.loras): + raise ValueError(f'LoRA "{lora_key}" already applied to Qwen3 encoder.') + + output = AnimaLoRALoaderOutput() + + if self.transformer is not None: + output.transformer = self.transformer.model_copy(deep=True) + output.transformer.loras.append( + LoRAField( + lora=self.lora, + weight=self.weight, + ) + ) + if self.qwen3_encoder is not None: + output.qwen3_encoder = self.qwen3_encoder.model_copy(deep=True) + output.qwen3_encoder.loras.append( + LoRAField( + lora=self.lora, + weight=self.weight, + ) + ) + + return output + + +@invocation( + "anima_lora_collection_loader", + title="Apply LoRA Collection - Anima", + tags=["lora", "model", "anima"], + category="model", + version="1.0.0", +) +class AnimaLoRACollectionLoader(BaseInvocation): + """Applies a collection of LoRAs to an Anima transformer.""" + + loras: Optional[LoRAField | list[LoRAField]] = InputField( + default=None, description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs" + ) + + transformer: Optional[TransformerField] = InputField( + default=None, + description=FieldDescriptions.transformer, + input=Input.Connection, + title="Transformer", + ) + qwen3_encoder: Qwen3EncoderField | None = InputField( + default=None, + title="Qwen3 Encoder", + description=FieldDescriptions.qwen3_encoder, + input=Input.Connection, + ) + + def invoke(self, context: InvocationContext) -> AnimaLoRALoaderOutput: + output = AnimaLoRALoaderOutput() + loras = self.loras if isinstance(self.loras, list) else [self.loras] + added_loras: list[str] = [] + + if self.transformer is not None: + output.transformer = self.transformer.model_copy(deep=True) + + if self.qwen3_encoder is not None: + output.qwen3_encoder = self.qwen3_encoder.model_copy(deep=True) + + for lora in loras: + if lora is None: + continue + if lora.lora.key in added_loras: + continue + + if not context.models.exists(lora.lora.key): + raise Exception(f"Unknown lora: {lora.lora.key}!") + + if lora.lora.base is not BaseModelType.Anima: + raise ValueError( + f"LoRA '{lora.lora.key}' is for {lora.lora.base.value if lora.lora.base else 'unknown'} models, " + "not Anima models. Ensure you are using an Anima compatible LoRA." + ) + + added_loras.append(lora.lora.key) + + if self.transformer is not None and output.transformer is not None: + output.transformer.loras.append(lora) + + if self.qwen3_encoder is not None and output.qwen3_encoder is not None: + output.qwen3_encoder.loras.append(lora) + + return output diff --git a/invokeai/app/invocations/anima_model_loader.py b/invokeai/app/invocations/anima_model_loader.py new file mode 100644 index 00000000000..01eb588624e --- /dev/null +++ b/invokeai/app/invocations/anima_model_loader.py @@ -0,0 +1,123 @@ +from typing import Optional + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + Classification, + invocation, + invocation_output, +) +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField +from invokeai.app.invocations.model import ( + ModelIdentifierField, + Qwen3EncoderField, + T5EncoderField, + TransformerField, + VAEField, +) +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.app.util.t5_model_identifier import ( + preprocess_t5_encoder_model_identifier, + preprocess_t5_tokenizer_model_identifier, +) +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType + + +@invocation_output("anima_model_loader_output") +class AnimaModelLoaderOutput(BaseInvocationOutput): + """Anima model loader output.""" + + transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer") + qwen3_encoder: Qwen3EncoderField = OutputField(description=FieldDescriptions.qwen3_encoder, title="Qwen3 Encoder") + vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") + t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder") + + +@invocation( + "anima_model_loader", + title="Main Model - Anima", + tags=["model", "anima"], + category="model", + version="1.2.0", + classification=Classification.Prototype, +) +class AnimaModelLoaderInvocation(BaseInvocation): + """Loads an Anima model, outputting its submodels. + + Anima uses: + - Transformer: Cosmos Predict2 DiT + LLM Adapter (from single-file checkpoint) + - Qwen3 Encoder: Qwen3 0.6B (standalone single-file) + - VAE: AutoencoderKLQwenImage / Wan 2.1 VAE (standalone single-file or FLUX VAE) + - T5 Encoder: T5-XXL model (only the tokenizer submodel is used, for LLM Adapter token IDs) + """ + + model: ModelIdentifierField = InputField( + description="Anima main model (transformer + LLM adapter).", + input=Input.Direct, + ui_model_base=BaseModelType.Anima, + ui_model_type=ModelType.Main, + title="Transformer", + ) + + vae_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="Standalone VAE model. Anima uses a Wan 2.1 / QwenImage VAE (16-channel). " + "If not provided, a FLUX VAE can be used as a fallback.", + input=Input.Direct, + ui_model_type=ModelType.VAE, + title="VAE", + ) + + qwen3_encoder_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="Standalone Qwen3 0.6B Encoder model.", + input=Input.Direct, + ui_model_type=ModelType.Qwen3Encoder, + title="Qwen3 Encoder", + ) + + t5_encoder_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="T5-XXL encoder model. The tokenizer submodel is used for Anima text encoding.", + input=Input.Direct, + ui_model_type=ModelType.T5Encoder, + title="T5 Encoder", + ) + + def invoke(self, context: InvocationContext) -> AnimaModelLoaderOutput: + # Transformer always comes from the main model + transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + + # VAE + if self.vae_model is not None: + vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE}) + else: + raise ValueError( + "No VAE source provided. Set 'VAE' to a compatible VAE model " + "(Wan 2.1 QwenImage VAE or FLUX VAE)." + ) + + # Qwen3 Encoder + if self.qwen3_encoder_model is not None: + qwen3_tokenizer = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + qwen3_encoder = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + else: + raise ValueError( + "No Qwen3 Encoder source provided. Set 'Qwen3 Encoder' to a Qwen3 0.6B model." + ) + + # T5 Encoder (only tokenizer submodel is used by Anima) + if self.t5_encoder_model is not None: + t5_tokenizer = preprocess_t5_tokenizer_model_identifier(self.t5_encoder_model) + t5_encoder = preprocess_t5_encoder_model_identifier(self.t5_encoder_model) + else: + raise ValueError( + "No T5 Encoder source provided. Set 'T5 Encoder' to a T5-XXL encoder model." + ) + + return AnimaModelLoaderOutput( + transformer=TransformerField(transformer=transformer, loras=[]), + qwen3_encoder=Qwen3EncoderField(tokenizer=qwen3_tokenizer, text_encoder=qwen3_encoder), + vae=VAEField(vae=vae), + t5_encoder=T5EncoderField(tokenizer=t5_tokenizer, text_encoder=t5_encoder, loras=[]), + ) diff --git a/invokeai/app/invocations/anima_text_encoder.py b/invokeai/app/invocations/anima_text_encoder.py new file mode 100644 index 00000000000..b8743ae6b37 --- /dev/null +++ b/invokeai/app/invocations/anima_text_encoder.py @@ -0,0 +1,212 @@ +"""Anima text encoder invocation. + +Encodes text using the dual-conditioning pipeline: +1. Qwen3 0.6B: Produces hidden states (last layer) +2. T5-XXL Tokenizer: Produces token IDs only (no T5 model needed) + +Both outputs are stored together in AnimaConditioningInfo and used by +the LLM Adapter inside the transformer during denoising. + +Key differences from Z-Image text encoder: +- Anima uses Qwen3 0.6B (base model, NOT instruct) — no chat template +- Anima additionally tokenizes with T5-XXL tokenizer to get token IDs +- Qwen3 output uses all positions (including padding) for full context +""" + +from contextlib import ExitStack +from typing import Iterator, Tuple + +import torch +from transformers import PreTrainedModel, PreTrainedTokenizerBase + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + AnimaConditioningField, + FieldDescriptions, + Input, + InputField, + TensorField, + UIComponent, +) +from invokeai.app.invocations.model import Qwen3EncoderField, T5EncoderField +from invokeai.app.invocations.primitives import AnimaConditioningOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.patches.layer_patcher import LayerPatcher +from invokeai.backend.patches.lora_conversions.anima_lora_constants import ANIMA_LORA_QWEN3_PREFIX +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( + AnimaConditioningInfo, + ConditioningFieldData, +) +from invokeai.backend.util.devices import TorchDevice + +# T5-XXL max sequence length for token IDs +T5_MAX_SEQ_LEN = 512 + + +@invocation( + "anima_text_encoder", + title="Prompt - Anima", + tags=["prompt", "conditioning", "anima"], + category="conditioning", + version="1.3.0", + classification=Classification.Prototype, +) +class AnimaTextEncoderInvocation(BaseInvocation): + """Encodes and preps a prompt for an Anima image. + + Uses Qwen3 0.6B for hidden state extraction and T5-XXL tokenizer for + token IDs (no T5 model weights needed). Both are combined by the + LLM Adapter inside the Anima transformer during denoising. + """ + + prompt: str = InputField(description="Text prompt to encode.", ui_component=UIComponent.Textarea) + qwen3_encoder: Qwen3EncoderField = InputField( + title="Qwen3 Encoder", + description=FieldDescriptions.qwen3_encoder, + input=Input.Connection, + ) + t5_encoder: T5EncoderField = InputField( + title="T5 Encoder", + description=FieldDescriptions.t5_encoder, + input=Input.Connection, + ) + mask: TensorField | None = InputField( + default=None, + description="A mask defining the region that this conditioning prompt applies to.", + ) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> AnimaConditioningOutput: + qwen3_embeds, t5xxl_ids, t5xxl_weights = self._encode_prompt(context) + + # Move to CPU for storage + qwen3_embeds = qwen3_embeds.detach().to("cpu") + t5xxl_ids = t5xxl_ids.detach().to("cpu") + t5xxl_weights = t5xxl_weights.detach().to("cpu") if t5xxl_weights is not None else None + + conditioning_data = ConditioningFieldData( + conditionings=[ + AnimaConditioningInfo( + qwen3_embeds=qwen3_embeds, + t5xxl_ids=t5xxl_ids, + t5xxl_weights=t5xxl_weights, + ) + ] + ) + conditioning_name = context.conditioning.save(conditioning_data) + return AnimaConditioningOutput( + conditioning=AnimaConditioningField(conditioning_name=conditioning_name, mask=self.mask) + ) + + def _encode_prompt( + self, + context: InvocationContext, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor | None]: + """Encode prompt using Qwen3 0.6B and T5-XXL tokenizer. + + Returns: + Tuple of (qwen3_embeds, t5xxl_ids, t5xxl_weights). + - qwen3_embeds: Shape (max_seq_len, 1024) — includes all positions (including padding) + to preserve full sequence context for the LLM Adapter. + - t5xxl_ids: Shape (seq_len,) — T5-XXL token IDs (unpadded). + - t5xxl_weights: None (uniform weights for now). + """ + prompt = self.prompt + + # --- Step 1: Encode with Qwen3 0.6B --- + text_encoder_info = context.models.load(self.qwen3_encoder.text_encoder) + tokenizer_info = context.models.load(self.qwen3_encoder.tokenizer) + + with ExitStack() as exit_stack: + (_, text_encoder) = exit_stack.enter_context(text_encoder_info.model_on_device()) + (_, tokenizer) = exit_stack.enter_context(tokenizer_info.model_on_device()) + + device = text_encoder.device + + # Apply LoRA models to the text encoder + lora_dtype = TorchDevice.choose_bfloat16_safe_dtype(device) + exit_stack.enter_context( + LayerPatcher.apply_smart_model_patches( + model=text_encoder, + patches=self._lora_iterator(context), + prefix=ANIMA_LORA_QWEN3_PREFIX, + dtype=lora_dtype, + ) + ) + + if not isinstance(text_encoder, PreTrainedModel): + raise TypeError( + f"Expected PreTrainedModel for text encoder, got {type(text_encoder).__name__}." + ) + if not isinstance(tokenizer, PreTrainedTokenizerBase): + raise TypeError( + f"Expected PreTrainedTokenizerBase for tokenizer, got {type(tokenizer).__name__}." + ) + + context.util.signal_progress("Running Qwen3 0.6B text encoder") + + # Anima uses base Qwen3 (not instruct) — tokenize directly, no chat template. + # No padding or truncation: the LLM Adapter uses rotary position embeddings + # with no fixed positional limit, so the Qwen3 source sequence can be any length. + text_inputs = tokenizer( + prompt, + padding=False, + truncation=False, + return_attention_mask=True, + return_tensors="pt", + ) + + text_input_ids = text_inputs.input_ids + attention_mask = text_inputs.attention_mask + if not isinstance(text_input_ids, torch.Tensor) or not isinstance(attention_mask, torch.Tensor): + raise TypeError("Tokenizer returned unexpected types.") + + # Ensure at least 1 token (empty prompts produce 0 tokens with padding=False) + if text_input_ids.shape[-1] == 0: + pad_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id + text_input_ids = torch.tensor([[pad_id]]) + attention_mask = torch.tensor([[1]]) + + # Get last hidden state from Qwen3 (final layer output) + prompt_mask = attention_mask.to(device).bool() + outputs = text_encoder( + text_input_ids.to(device), + attention_mask=prompt_mask, + output_hidden_states=True, + ) + + if not hasattr(outputs, "hidden_states") or outputs.hidden_states is None: + raise RuntimeError("Text encoder did not return hidden_states.") + if len(outputs.hidden_states) < 1: + raise RuntimeError(f"Expected at least 1 hidden state, got {len(outputs.hidden_states)}.") + + # Use last hidden state — only real tokens, no padding + qwen3_embeds = outputs.hidden_states[-1][0] # Shape: (seq_len, 1024) + + # --- Step 2: Tokenize with T5-XXL tokenizer (IDs only, no model) --- + context.util.signal_progress("Tokenizing with T5-XXL") + t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer) + with t5_tokenizer_info.model_on_device() as (_, t5_tokenizer): + t5_tokens = t5_tokenizer( + prompt, + padding=False, + truncation=True, + max_length=T5_MAX_SEQ_LEN, + return_tensors="pt", + ) + t5xxl_ids = t5_tokens.input_ids[0] # Shape: (seq_len,) + + return qwen3_embeds, t5xxl_ids, None + + def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]: + """Iterate over LoRA models to apply to the Qwen3 text encoder.""" + for lora in self.qwen3_encoder.loras: + lora_info = context.models.load(lora.lora) + if not isinstance(lora_info.model, ModelPatchRaw): + raise TypeError( + f"Expected ModelPatchRaw for LoRA '{lora.lora.key}', got {type(lora_info.model).__name__}. " + "The LoRA model may be corrupted or incompatible." + ) + yield (lora_info.model, lora.weight) + del lora_info diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index cca09a059d5..71b99d6687a 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -340,6 +340,21 @@ class ZImageConditioningField(BaseModel): ) +class AnimaConditioningField(BaseModel): + """An Anima conditioning tensor primitive value. + + Anima conditioning contains Qwen3 0.6B hidden states and T5-XXL token IDs, + which are combined by the LLM Adapter inside the transformer. + """ + + conditioning_name: str = Field(description="The name of conditioning tensor") + mask: Optional[TensorField] = Field( + default=None, + description="The mask associated with this conditioning tensor for regional prompting. " + "Excluded regions should be set to False, included regions should be set to True.", + ) + + class ConditioningField(BaseModel): """A conditioning tensor primitive value""" diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index bc13b72c7bb..29e8b3d69b2 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -166,6 +166,10 @@ def invoke(self, context: InvocationContext) -> MetadataOutput: "z_image_img2img", "z_image_inpaint", "z_image_outpaint", + "anima_txt2img", + "anima_img2img", + "anima_inpaint", + "anima_outpaint", ] diff --git a/invokeai/app/invocations/primitives.py b/invokeai/app/invocations/primitives.py index dcb1fc6a45f..2f404d16ba8 100644 --- a/invokeai/app/invocations/primitives.py +++ b/invokeai/app/invocations/primitives.py @@ -12,6 +12,7 @@ ) from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR from invokeai.app.invocations.fields import ( + AnimaConditioningField, BoundingBoxField, CogView4ConditioningField, ColorField, @@ -473,6 +474,17 @@ def build(cls, conditioning_name: str) -> "ZImageConditioningOutput": return cls(conditioning=ZImageConditioningField(conditioning_name=conditioning_name)) +@invocation_output("anima_conditioning_output") +class AnimaConditioningOutput(BaseInvocationOutput): + """Base class for nodes that output an Anima text conditioning tensor.""" + + conditioning: AnimaConditioningField = OutputField(description=FieldDescriptions.cond) + + @classmethod + def build(cls, conditioning_name: str) -> "AnimaConditioningOutput": + return cls(conditioning=AnimaConditioningField(conditioning_name=conditioning_name)) + + @invocation_output("conditioning_output") class ConditioningOutput(BaseInvocationOutput): """Base class for nodes that output a single conditioning tensor""" diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py index 990fdd51d8b..0e2faeca391 100644 --- a/invokeai/app/util/step_callback.py +++ b/invokeai/app/util/step_callback.py @@ -133,6 +133,29 @@ FLUX2_LATENT_RGB_BIAS = [-0.0329, -0.0718, -0.0851] +# Anima uses Wan 2.1 VAE with 16 latent channels. +# Factors from ComfyUI: https://github.com/Comfy-Org/ComfyUI/blob/main/comfy/latent_formats.py +ANIMA_LATENT_RGB_FACTORS = [ + [-0.1299, -0.1692, 0.2932], + [0.0671, 0.0406, 0.0442], + [0.3568, 0.2548, 0.1747], + [0.0372, 0.2344, 0.1420], + [0.0313, 0.0189, -0.0328], + [0.0296, -0.0956, -0.0665], + [-0.3477, -0.4059, -0.2925], + [0.0166, 0.1902, 0.1975], + [-0.0412, 0.0267, -0.1364], + [-0.1293, 0.0740, 0.1636], + [0.0680, 0.3019, 0.1128], + [0.0032, 0.0581, 0.0639], + [-0.1251, 0.0927, 0.1699], + [0.0060, -0.0633, 0.0005], + [0.3477, 0.2275, 0.2950], + [0.1984, 0.0913, 0.1861], +] + +ANIMA_LATENT_RGB_BIAS = [-0.1835, -0.0868, -0.3360] + def sample_to_lowres_estimated_image( samples: torch.Tensor, @@ -217,6 +240,10 @@ def diffusion_step_callback( elif base_model == BaseModelType.ZImage: # Z-Image uses FLUX-compatible VAE with 16 latent channels latent_rgb_factors = FLUX_LATENT_RGB_FACTORS + elif base_model == BaseModelType.Anima: + # Anima uses Wan 2.1 VAE with 16 latent channels + latent_rgb_factors = ANIMA_LATENT_RGB_FACTORS + latent_rgb_bias = ANIMA_LATENT_RGB_BIAS else: raise ValueError(f"Unsupported base model: {base_model}") diff --git a/invokeai/backend/anima/__init__.py b/invokeai/backend/anima/__init__.py new file mode 100644 index 00000000000..01a1a952e96 --- /dev/null +++ b/invokeai/backend/anima/__init__.py @@ -0,0 +1,6 @@ +"""Anima model backend module. + +Anima is a 2B-parameter anime-focused text-to-image model built on NVIDIA's +Cosmos Predict2 DiT architecture with a custom LLM Adapter that bridges Qwen3 +0.6B text encoder outputs to the DiT backbone. +""" diff --git a/invokeai/backend/anima/anima_transformer.py b/invokeai/backend/anima/anima_transformer.py new file mode 100644 index 00000000000..6c852cd4fba --- /dev/null +++ b/invokeai/backend/anima/anima_transformer.py @@ -0,0 +1,1032 @@ +"""Anima transformer model: Cosmos Predict2 MiniTrainDIT + LLM Adapter. + +The Anima architecture combines: +1. MiniTrainDIT: A Cosmos Predict2 DiT backbone with 28 blocks, 2048-dim hidden state, + and 3D RoPE positional embeddings. +2. LLMAdapter: A 6-layer cross-attention transformer that fuses Qwen3 0.6B hidden states + with learned T5-XXL token embeddings to produce conditioning for the DiT. + +Original source code: +- MiniTrainDIT backbone and positional embeddings: https://github.com/nvidia-cosmos/cosmos-predict2 + SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 +- LLMAdapter and Anima wrapper: Clean-room implementation based on + https://github.com/hdae/diffusers-anima (Apache-2.0) +""" + +import logging +import math +from typing import Optional, Tuple + +import torch +import torch.nn.functional as F +from einops import rearrange, repeat +from einops.layers.torch import Rearrange +from torch import nn + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Positional Embeddings +# Original source: https://github.com/nvidia-cosmos/cosmos-predict2 +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. Apache-2.0 +# ============================================================================ + + +class VideoRopePosition3DEmb(nn.Module): + """3D Rotary Position Embedding for video/image transformers. + + Generates rotary embeddings with separate frequency components for + height, width, and temporal dimensions. + """ + + def __init__( + self, + *, + head_dim: int, + len_h: int, + len_w: int, + len_t: int, + base_fps: int = 24, + h_extrapolation_ratio: float = 1.0, + w_extrapolation_ratio: float = 1.0, + t_extrapolation_ratio: float = 1.0, + enable_fps_modulation: bool = True, + device: Optional[torch.device] = None, + **kwargs, + ): + super().__init__() + self.base_fps = base_fps + self.max_h = len_h + self.max_w = len_w + self.enable_fps_modulation = enable_fps_modulation + + dim = head_dim + dim_h = dim // 6 * 2 + dim_w = dim_h + dim_t = dim - 2 * dim_h + assert dim == dim_h + dim_w + dim_t, f"bad dim: {dim} != {dim_h} + {dim_w} + {dim_t}" + + self.register_buffer( + "dim_spatial_range", + torch.arange(0, dim_h, 2, device=device)[: (dim_h // 2)].float() / dim_h, + persistent=False, + ) + self.register_buffer( + "dim_temporal_range", + torch.arange(0, dim_t, 2, device=device)[: (dim_t // 2)].float() / dim_t, + persistent=False, + ) + + self.h_ntk_factor = h_extrapolation_ratio ** (dim_h / (dim_h - 2)) + self.w_ntk_factor = w_extrapolation_ratio ** (dim_w / (dim_w - 2)) + self.t_ntk_factor = t_extrapolation_ratio ** (dim_t / (dim_t - 2)) + + def forward( + self, + x_B_T_H_W_C: torch.Tensor, + fps: Optional[torch.Tensor] = None, + device: Optional[torch.device] = None, + ) -> torch.Tensor: + return self.generate_embeddings(x_B_T_H_W_C.shape, fps=fps, device=device) + + def generate_embeddings( + self, + B_T_H_W_C: torch.Size, + fps: Optional[torch.Tensor] = None, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, + ) -> torch.Tensor: + h_theta = 10000.0 * self.h_ntk_factor + w_theta = 10000.0 * self.w_ntk_factor + t_theta = 10000.0 * self.t_ntk_factor + + h_spatial_freqs = 1.0 / (h_theta ** self.dim_spatial_range.to(device=device)) + w_spatial_freqs = 1.0 / (w_theta ** self.dim_spatial_range.to(device=device)) + temporal_freqs = 1.0 / (t_theta ** self.dim_temporal_range.to(device=device)) + + B, T, H, W, _ = B_T_H_W_C + seq = torch.arange(max(H, W, T), dtype=torch.float, device=device) + + half_emb_h = torch.outer(seq[:H].to(device=device), h_spatial_freqs) + half_emb_w = torch.outer(seq[:W].to(device=device), w_spatial_freqs) + + if fps is None or self.enable_fps_modulation is False: + half_emb_t = torch.outer(seq[:T].to(device=device), temporal_freqs) + else: + half_emb_t = torch.outer(seq[:T].to(device=device) / fps * self.base_fps, temporal_freqs) + + half_emb_h = torch.stack( + [torch.cos(half_emb_h), -torch.sin(half_emb_h), torch.sin(half_emb_h), torch.cos(half_emb_h)], dim=-1 + ) + half_emb_w = torch.stack( + [torch.cos(half_emb_w), -torch.sin(half_emb_w), torch.sin(half_emb_w), torch.cos(half_emb_w)], dim=-1 + ) + half_emb_t = torch.stack( + [torch.cos(half_emb_t), -torch.sin(half_emb_t), torch.sin(half_emb_t), torch.cos(half_emb_t)], dim=-1 + ) + + em_T_H_W_D = torch.cat( + [ + repeat(half_emb_t, "t d x -> t h w d x", h=H, w=W), + repeat(half_emb_h, "h d x -> t h w d x", t=T, w=W), + repeat(half_emb_w, "w d x -> t h w d x", t=T, h=H), + ], + dim=-2, + ) + + return rearrange(em_T_H_W_D, "t h w d (i j) -> (t h w) d i j", i=2, j=2).float() + + +def _normalize(x: torch.Tensor, dim: Optional[list[int]] = None, eps: float = 0) -> torch.Tensor: + if dim is None: + dim = list(range(1, x.ndim)) + norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32) + norm = torch.add(eps, norm, alpha=math.sqrt(norm.numel() / x.numel())) + return x / norm.to(x.dtype) + + +class LearnablePosEmbAxis(nn.Module): + """Learnable per-axis positional embeddings.""" + + def __init__( + self, + *, + model_channels: int, + len_h: int, + len_w: int, + len_t: int, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, + **kwargs, + ): + super().__init__() + self.pos_emb_h = nn.Parameter(torch.empty(len_h, model_channels, device=device, dtype=dtype)) + self.pos_emb_w = nn.Parameter(torch.empty(len_w, model_channels, device=device, dtype=dtype)) + self.pos_emb_t = nn.Parameter(torch.empty(len_t, model_channels, device=device, dtype=dtype)) + + def forward( + self, + x_B_T_H_W_C: torch.Tensor, + fps: Optional[torch.Tensor] = None, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, + ) -> torch.Tensor: + return self.generate_embeddings(x_B_T_H_W_C.shape, device=device, dtype=dtype) + + def generate_embeddings( + self, + B_T_H_W_C: torch.Size, + fps: Optional[torch.Tensor] = None, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, + ) -> torch.Tensor: + B, T, H, W, _ = B_T_H_W_C + emb_h_H = self.pos_emb_h[:H].to(device=device, dtype=dtype) + emb_w_W = self.pos_emb_w[:W].to(device=device, dtype=dtype) + emb_t_T = self.pos_emb_t[:T].to(device=device, dtype=dtype) + emb = ( + repeat(emb_t_T, "t d -> b t h w d", b=B, h=H, w=W) + + repeat(emb_h_H, "h d -> b t h w d", b=B, t=T, w=W) + + repeat(emb_w_W, "w d -> b t h w d", b=B, t=T, h=H) + ) + return _normalize(emb, dim=-1, eps=1e-6) + + +# ============================================================================ +# Cosmos Predict2 MiniTrainDIT +# Original source: https://github.com/nvidia-cosmos/cosmos-predict2 +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. Apache-2.0 +# ============================================================================ + + +def apply_rotary_pos_emb_cosmos(t: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + """Apply rotary position embeddings in Cosmos format (2x2 rotation matrices).""" + t_ = t.reshape(*t.shape[:-1], 2, -1).movedim(-2, -1).unsqueeze(-2).float() + t_out = freqs[..., 0] * t_[..., 0] + freqs[..., 1] * t_[..., 1] + t_out = t_out.movedim(-1, -2).reshape(*t.shape).type_as(t) + return t_out + + +class GPT2FeedForward(nn.Module): + def __init__(self, d_model: int, d_ff: int) -> None: + super().__init__() + self.activation = nn.GELU() + self.layer1 = nn.Linear(d_model, d_ff, bias=False) + self.layer2 = nn.Linear(d_ff, d_model, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.layer2(self.activation(self.layer1(x))) + + +class CosmosAttention(nn.Module): + """Multi-head attention for the Cosmos DiT backbone. + + Supports both self-attention and cross-attention with QK normalization + and rotary position embeddings. + """ + + def __init__( + self, + query_dim: int, + context_dim: Optional[int] = None, + n_heads: int = 8, + head_dim: int = 64, + dropout: float = 0.0, + ) -> None: + super().__init__() + self.is_selfattn = context_dim is None + context_dim = query_dim if context_dim is None else context_dim + inner_dim = head_dim * n_heads + + self.n_heads = n_heads + self.head_dim = head_dim + + self.q_proj = nn.Linear(query_dim, inner_dim, bias=False) + self.q_norm = nn.RMSNorm(head_dim, eps=1e-6) + + self.k_proj = nn.Linear(context_dim, inner_dim, bias=False) + self.k_norm = nn.RMSNorm(head_dim, eps=1e-6) + + self.v_proj = nn.Linear(context_dim, inner_dim, bias=False) + self.v_norm = nn.Identity() + + self.output_proj = nn.Linear(inner_dim, query_dim, bias=False) + self.output_dropout = nn.Dropout(dropout) if dropout > 1e-4 else nn.Identity() + + def forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + rope_emb: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + q = self.q_proj(x) + context = x if context is None else context + k = self.k_proj(context) + v = self.v_proj(context) + q, k, v = (rearrange(t, "b ... (h d) -> b ... h d", h=self.n_heads, d=self.head_dim) for t in (q, k, v)) + + q = self.q_norm(q) + k = self.k_norm(k) + v = self.v_norm(v) + + if self.is_selfattn and rope_emb is not None: + q = apply_rotary_pos_emb_cosmos(q, rope_emb) + k = apply_rotary_pos_emb_cosmos(k, rope_emb) + + # Reshape for scaled_dot_product_attention: (B, heads, seq, dim) + in_q_shape = q.shape + in_k_shape = k.shape + q = rearrange(q, "b ... h d -> b h ... d").reshape(in_q_shape[0], in_q_shape[-2], -1, in_q_shape[-1]) + k = rearrange(k, "b ... h d -> b h ... d").reshape(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1]) + v = rearrange(v, "b ... h d -> b h ... d").reshape(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1]) + + result = F.scaled_dot_product_attention(q, k, v) + result = rearrange(result, "b h s d -> b s (h d)") + return self.output_dropout(self.output_proj(result)) + + +class Timesteps(nn.Module): + """Sinusoidal timestep embeddings.""" + + def __init__(self, num_channels: int): + super().__init__() + self.num_channels = num_channels + + def forward(self, timesteps_B_T: torch.Tensor) -> torch.Tensor: + assert timesteps_B_T.ndim == 2 + timesteps = timesteps_B_T.flatten().float() + half_dim = self.num_channels // 2 + exponent = -math.log(10000) * torch.arange(half_dim, dtype=torch.float32, device=timesteps.device) / half_dim + emb = timesteps[:, None].float() * torch.exp(exponent)[None, :] + emb = torch.cat([torch.cos(emb), torch.sin(emb)], dim=-1) + return rearrange(emb, "(b t) d -> b t d", b=timesteps_B_T.shape[0], t=timesteps_B_T.shape[1]) + + +class TimestepEmbedding(nn.Module): + """Projects sinusoidal timestep embeddings to model dimension.""" + + def __init__(self, in_features: int, out_features: int, use_adaln_lora: bool = False): + super().__init__() + self.use_adaln_lora = use_adaln_lora + self.linear_1 = nn.Linear(in_features, out_features, bias=not use_adaln_lora) + self.activation = nn.SiLU() + if use_adaln_lora: + self.linear_2 = nn.Linear(out_features, 3 * out_features, bias=False) + else: + self.linear_2 = nn.Linear(out_features, out_features, bias=False) + + def forward(self, sample: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + emb = self.linear_2(self.activation(self.linear_1(sample))) + if self.use_adaln_lora: + return sample, emb + return emb, None + + +class PatchEmbed(nn.Module): + """Patchify input tensor via rearrange + linear projection.""" + + def __init__( + self, + spatial_patch_size: int, + temporal_patch_size: int, + in_channels: int = 3, + out_channels: int = 768, + ): + super().__init__() + self.spatial_patch_size = spatial_patch_size + self.temporal_patch_size = temporal_patch_size + self.proj = nn.Sequential( + Rearrange( + "b c (t r) (h m) (w n) -> b t h w (c r m n)", + r=temporal_patch_size, + m=spatial_patch_size, + n=spatial_patch_size, + ), + nn.Linear( + in_channels * spatial_patch_size * spatial_patch_size * temporal_patch_size, + out_channels, + bias=False, + ), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + assert x.dim() == 5 + return self.proj(x) + + +class FinalLayer(nn.Module): + """Final AdaLN-modulated output projection.""" + + def __init__( + self, + hidden_size: int, + spatial_patch_size: int, + temporal_patch_size: int, + out_channels: int, + use_adaln_lora: bool = False, + adaln_lora_dim: int = 256, + ): + super().__init__() + self.layer_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.linear = nn.Linear( + hidden_size, spatial_patch_size * spatial_patch_size * temporal_patch_size * out_channels, bias=False + ) + self.hidden_size = hidden_size + self.use_adaln_lora = use_adaln_lora + + if use_adaln_lora: + self.adaln_modulation = nn.Sequential( + nn.SiLU(), + nn.Linear(hidden_size, adaln_lora_dim, bias=False), + nn.Linear(adaln_lora_dim, 2 * hidden_size, bias=False), + ) + else: + self.adaln_modulation = nn.Sequential( + nn.SiLU(), + nn.Linear(hidden_size, 2 * hidden_size, bias=False), + ) + + def forward( + self, + x_B_T_H_W_D: torch.Tensor, + emb_B_T_D: torch.Tensor, + adaln_lora_B_T_3D: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + if self.use_adaln_lora: + assert adaln_lora_B_T_3D is not None + shift, scale = ( + self.adaln_modulation(emb_B_T_D) + adaln_lora_B_T_3D[:, :, : 2 * self.hidden_size] + ).chunk(2, dim=-1) + else: + shift, scale = self.adaln_modulation(emb_B_T_D).chunk(2, dim=-1) + + shift = rearrange(shift, "b t d -> b t 1 1 d") + scale = rearrange(scale, "b t d -> b t 1 1 d") + + x_B_T_H_W_D = self.layer_norm(x_B_T_H_W_D) * (1 + scale) + shift + return self.linear(x_B_T_H_W_D) + + +class DiTBlock(nn.Module): + """Cosmos DiT transformer block with self-attention, cross-attention, and MLP. + + Each component uses AdaLN (Adaptive Layer Normalization) modulation from + the timestep embedding. + """ + + def __init__( + self, + x_dim: int, + context_dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + use_adaln_lora: bool = False, + adaln_lora_dim: int = 256, + ): + super().__init__() + self.x_dim = x_dim + self.use_adaln_lora = use_adaln_lora + + self.layer_norm_self_attn = nn.LayerNorm(x_dim, elementwise_affine=False, eps=1e-6) + self.self_attn = CosmosAttention(x_dim, None, num_heads, x_dim // num_heads) + + self.layer_norm_cross_attn = nn.LayerNorm(x_dim, elementwise_affine=False, eps=1e-6) + self.cross_attn = CosmosAttention(x_dim, context_dim, num_heads, x_dim // num_heads) + + self.layer_norm_mlp = nn.LayerNorm(x_dim, elementwise_affine=False, eps=1e-6) + self.mlp = GPT2FeedForward(x_dim, int(x_dim * mlp_ratio)) + + # AdaLN modulation layers (shift, scale, gate for each of 3 components) + if use_adaln_lora: + self.adaln_modulation_self_attn = nn.Sequential( + nn.SiLU(), nn.Linear(x_dim, adaln_lora_dim, bias=False), nn.Linear(adaln_lora_dim, 3 * x_dim, bias=False) + ) + self.adaln_modulation_cross_attn = nn.Sequential( + nn.SiLU(), nn.Linear(x_dim, adaln_lora_dim, bias=False), nn.Linear(adaln_lora_dim, 3 * x_dim, bias=False) + ) + self.adaln_modulation_mlp = nn.Sequential( + nn.SiLU(), nn.Linear(x_dim, adaln_lora_dim, bias=False), nn.Linear(adaln_lora_dim, 3 * x_dim, bias=False) + ) + else: + self.adaln_modulation_self_attn = nn.Sequential(nn.SiLU(), nn.Linear(x_dim, 3 * x_dim, bias=False)) + self.adaln_modulation_cross_attn = nn.Sequential(nn.SiLU(), nn.Linear(x_dim, 3 * x_dim, bias=False)) + self.adaln_modulation_mlp = nn.Sequential(nn.SiLU(), nn.Linear(x_dim, 3 * x_dim, bias=False)) + + def forward( + self, + x_B_T_H_W_D: torch.Tensor, + emb_B_T_D: torch.Tensor, + crossattn_emb: torch.Tensor, + rope_emb_L_1_1_D: Optional[torch.Tensor] = None, + adaln_lora_B_T_3D: Optional[torch.Tensor] = None, + extra_per_block_pos_emb: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + residual_dtype = x_B_T_H_W_D.dtype + compute_dtype = emb_B_T_D.dtype + + if extra_per_block_pos_emb is not None: + x_B_T_H_W_D = x_B_T_H_W_D + extra_per_block_pos_emb + + # Compute AdaLN modulations + if self.use_adaln_lora: + assert adaln_lora_B_T_3D is not None + shift_sa, scale_sa, gate_sa = ( + self.adaln_modulation_self_attn(emb_B_T_D) + adaln_lora_B_T_3D + ).chunk(3, dim=-1) + shift_ca, scale_ca, gate_ca = ( + self.adaln_modulation_cross_attn(emb_B_T_D) + adaln_lora_B_T_3D + ).chunk(3, dim=-1) + shift_mlp, scale_mlp, gate_mlp = ( + self.adaln_modulation_mlp(emb_B_T_D) + adaln_lora_B_T_3D + ).chunk(3, dim=-1) + else: + shift_sa, scale_sa, gate_sa = self.adaln_modulation_self_attn(emb_B_T_D).chunk(3, dim=-1) + shift_ca, scale_ca, gate_ca = self.adaln_modulation_cross_attn(emb_B_T_D).chunk(3, dim=-1) + shift_mlp, scale_mlp, gate_mlp = self.adaln_modulation_mlp(emb_B_T_D).chunk(3, dim=-1) + + # Reshape for broadcasting: (B, T, D) -> (B, T, 1, 1, D) + shift_sa, scale_sa, gate_sa = (rearrange(t, "b t d -> b t 1 1 d") for t in (shift_sa, scale_sa, gate_sa)) + shift_ca, scale_ca, gate_ca = (rearrange(t, "b t d -> b t 1 1 d") for t in (shift_ca, scale_ca, gate_ca)) + shift_mlp, scale_mlp, gate_mlp = (rearrange(t, "b t d -> b t 1 1 d") for t in (shift_mlp, scale_mlp, gate_mlp)) + + B, T, H, W, D = x_B_T_H_W_D.shape + + def _adaln(x: torch.Tensor, norm: nn.Module, scale: torch.Tensor, shift: torch.Tensor) -> torch.Tensor: + return norm(x) * (1 + scale) + shift + + # Self-attention + normed = _adaln(x_B_T_H_W_D, self.layer_norm_self_attn, scale_sa, shift_sa) + result = rearrange( + self.self_attn(rearrange(normed.to(compute_dtype), "b t h w d -> b (t h w) d"), None, rope_emb=rope_emb_L_1_1_D), + "b (t h w) d -> b t h w d", t=T, h=H, w=W, + ) + x_B_T_H_W_D = x_B_T_H_W_D + gate_sa.to(residual_dtype) * result.to(residual_dtype) + + # Cross-attention + normed = _adaln(x_B_T_H_W_D, self.layer_norm_cross_attn, scale_ca, shift_ca) + result = rearrange( + self.cross_attn(rearrange(normed.to(compute_dtype), "b t h w d -> b (t h w) d"), crossattn_emb, rope_emb=rope_emb_L_1_1_D), + "b (t h w) d -> b t h w d", t=T, h=H, w=W, + ) + x_B_T_H_W_D = result.to(residual_dtype) * gate_ca.to(residual_dtype) + x_B_T_H_W_D + + # MLP + normed = _adaln(x_B_T_H_W_D, self.layer_norm_mlp, scale_mlp, shift_mlp) + result = self.mlp(normed.to(compute_dtype)) + x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp.to(residual_dtype) * result.to(residual_dtype) + + return x_B_T_H_W_D + + +class MiniTrainDIT(nn.Module): + """Cosmos Predict2 DiT backbone for video/image generation. + + This is the core transformer architecture that Anima extends. It processes + 3D latent tensors (B, C, T, H, W) with patch embedding, positional encoding, + and adaptive layer normalization. + + Args: + max_img_h: Maximum image height in pixels. + max_img_w: Maximum image width in pixels. + max_frames: Maximum number of video frames. + in_channels: Number of input latent channels. + out_channels: Number of output channels. + patch_spatial: Spatial patch size. + patch_temporal: Temporal patch size. + concat_padding_mask: Whether to concatenate a padding mask channel. + model_channels: Hidden dimension of the transformer. + num_blocks: Number of DiT blocks. + num_heads: Number of attention heads. + mlp_ratio: MLP expansion ratio. + crossattn_emb_channels: Cross-attention context dimension. + use_adaln_lora: Whether to use AdaLN-LoRA. + adaln_lora_dim: AdaLN-LoRA bottleneck dimension. + extra_per_block_abs_pos_emb: Whether to use extra learnable positional embeddings. + """ + + def __init__( + self, + max_img_h: int = 240, + max_img_w: int = 240, + max_frames: int = 1, + in_channels: int = 16, + out_channels: int = 16, + patch_spatial: int = 2, + patch_temporal: int = 1, + concat_padding_mask: bool = True, + model_channels: int = 2048, + num_blocks: int = 28, + num_heads: int = 16, + mlp_ratio: float = 4.0, + crossattn_emb_channels: int = 1024, + pos_emb_cls: str = "rope3d", + pos_emb_learnable: bool = False, + pos_emb_interpolation: str = "crop", + min_fps: int = 1, + max_fps: int = 30, + use_adaln_lora: bool = False, + adaln_lora_dim: int = 256, + rope_h_extrapolation_ratio: float = 1.0, + rope_w_extrapolation_ratio: float = 1.0, + rope_t_extrapolation_ratio: float = 1.0, + extra_per_block_abs_pos_emb: bool = False, + extra_h_extrapolation_ratio: float = 1.0, + extra_w_extrapolation_ratio: float = 1.0, + extra_t_extrapolation_ratio: float = 1.0, + rope_enable_fps_modulation: bool = True, + image_model: Optional[str] = None, + ) -> None: + super().__init__() + self.max_img_h = max_img_h + self.max_img_w = max_img_w + self.max_frames = max_frames + self.in_channels = in_channels + self.out_channels = out_channels + self.patch_spatial = patch_spatial + self.patch_temporal = patch_temporal + self.num_heads = num_heads + self.num_blocks = num_blocks + self.model_channels = model_channels + self.concat_padding_mask = concat_padding_mask + self.pos_emb_cls = pos_emb_cls + self.extra_per_block_abs_pos_emb = extra_per_block_abs_pos_emb + + # Positional embeddings + rope_kwargs = dict( + head_dim=model_channels // num_heads, + len_h=max_img_h // patch_spatial, + len_w=max_img_w // patch_spatial, + len_t=max_frames // patch_temporal, + max_fps=max_fps, + min_fps=min_fps, + h_extrapolation_ratio=rope_h_extrapolation_ratio, + w_extrapolation_ratio=rope_w_extrapolation_ratio, + t_extrapolation_ratio=rope_t_extrapolation_ratio, + enable_fps_modulation=rope_enable_fps_modulation, + ) + self.pos_embedder = VideoRopePosition3DEmb(**rope_kwargs) + + if extra_per_block_abs_pos_emb: + self.extra_pos_embedder = LearnablePosEmbAxis( + model_channels=model_channels, + len_h=max_img_h // patch_spatial, + len_w=max_img_w // patch_spatial, + len_t=max_frames // patch_temporal, + ) + + self.use_adaln_lora = use_adaln_lora + self.adaln_lora_dim = adaln_lora_dim + + # Timestep embedding + self.t_embedder = nn.Sequential( + Timesteps(model_channels), + TimestepEmbedding(model_channels, model_channels, use_adaln_lora=use_adaln_lora), + ) + self.t_embedding_norm = nn.RMSNorm(model_channels, eps=1e-6) + + # Patch embedding + embed_in_channels = in_channels + 1 if concat_padding_mask else in_channels + self.x_embedder = PatchEmbed( + spatial_patch_size=patch_spatial, + temporal_patch_size=patch_temporal, + in_channels=embed_in_channels, + out_channels=model_channels, + ) + + # Transformer blocks + self.blocks = nn.ModuleList([ + DiTBlock( + x_dim=model_channels, + context_dim=crossattn_emb_channels, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + use_adaln_lora=use_adaln_lora, + adaln_lora_dim=adaln_lora_dim, + ) + for _ in range(num_blocks) + ]) + + # Final output layer + self.final_layer = FinalLayer( + hidden_size=model_channels, + spatial_patch_size=patch_spatial, + temporal_patch_size=patch_temporal, + out_channels=out_channels, + use_adaln_lora=use_adaln_lora, + adaln_lora_dim=adaln_lora_dim, + ) + + def _pad_to_patch_size(self, x: torch.Tensor) -> torch.Tensor: + """Pad input tensor so dimensions are divisible by patch sizes.""" + _, _, T, H, W = x.shape + pad_t = (self.patch_temporal - T % self.patch_temporal) % self.patch_temporal + pad_h = (self.patch_spatial - H % self.patch_spatial) % self.patch_spatial + pad_w = (self.patch_spatial - W % self.patch_spatial) % self.patch_spatial + if pad_t > 0 or pad_h > 0 or pad_w > 0: + x = F.pad(x, (0, pad_w, 0, pad_h, 0, pad_t)) + return x + + def prepare_embedded_sequence( + self, + x_B_C_T_H_W: torch.Tensor, + fps: Optional[torch.Tensor] = None, + padding_mask: Optional[torch.Tensor] = None, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]: + if self.concat_padding_mask: + if padding_mask is None: + padding_mask = torch.zeros( + x_B_C_T_H_W.shape[0], 1, x_B_C_T_H_W.shape[3], x_B_C_T_H_W.shape[4], + dtype=x_B_C_T_H_W.dtype, device=x_B_C_T_H_W.device, + ) + x_B_C_T_H_W = torch.cat( + [x_B_C_T_H_W, padding_mask.unsqueeze(1).repeat(1, 1, x_B_C_T_H_W.shape[2], 1, 1)], dim=1 + ) + + x_B_T_H_W_D = self.x_embedder(x_B_C_T_H_W) + + extra_pos_emb = None + if self.extra_per_block_abs_pos_emb: + extra_pos_emb = self.extra_pos_embedder(x_B_T_H_W_D, fps=fps, device=x_B_C_T_H_W.device, dtype=x_B_C_T_H_W.dtype) + + if "rope" in self.pos_emb_cls.lower(): + return x_B_T_H_W_D, self.pos_embedder(x_B_T_H_W_D, fps=fps, device=x_B_C_T_H_W.device), extra_pos_emb + + return x_B_T_H_W_D, None, extra_pos_emb + + def unpatchify(self, x_B_T_H_W_M: torch.Tensor) -> torch.Tensor: + return rearrange( + x_B_T_H_W_M, + "B T H W (p1 p2 t C) -> B C (T t) (H p1) (W p2)", + p1=self.patch_spatial, + p2=self.patch_spatial, + t=self.patch_temporal, + ) + + def forward( + self, + x: torch.Tensor, + timesteps: torch.Tensor, + context: torch.Tensor, + fps: Optional[torch.Tensor] = None, + padding_mask: Optional[torch.Tensor] = None, + **kwargs, + ) -> torch.Tensor: + orig_shape = list(x.shape) + x = self._pad_to_patch_size(x) + + x_B_T_H_W_D, rope_emb_L_1_1_D, extra_pos_emb = self.prepare_embedded_sequence(x, fps=fps, padding_mask=padding_mask) + + if timesteps.ndim == 1: + timesteps = timesteps.unsqueeze(1) + t_emb, adaln_lora = self.t_embedder[1](self.t_embedder[0](timesteps).to(x_B_T_H_W_D.dtype)) + t_emb = self.t_embedding_norm(t_emb) + + block_kwargs = { + "rope_emb_L_1_1_D": rope_emb_L_1_1_D.unsqueeze(1).unsqueeze(0) if rope_emb_L_1_1_D is not None else None, + "adaln_lora_B_T_3D": adaln_lora, + "extra_per_block_pos_emb": extra_pos_emb, + } + + # Keep residual stream in fp32 for numerical stability with fp16 compute + if x_B_T_H_W_D.dtype == torch.float16: + x_B_T_H_W_D = x_B_T_H_W_D.float() + + for block in self.blocks: + x_B_T_H_W_D = block(x_B_T_H_W_D, t_emb, context, **block_kwargs) + + x_out = self.final_layer(x_B_T_H_W_D.to(context.dtype), t_emb, adaln_lora_B_T_3D=adaln_lora) + x_out = self.unpatchify(x_out)[:, :, : orig_shape[-3], : orig_shape[-2], : orig_shape[-1]] + return x_out + + +# ============================================================================ +# LLM Adapter +# Reference implementation: https://github.com/hdae/diffusers-anima +# SPDX-License-Identifier: Apache-2.0 +# ============================================================================ + + +def _rotate_half(x: torch.Tensor) -> torch.Tensor: + """Split the last dimension in half and negate-swap: [-x2, x1].""" + half = x.shape[-1] // 2 + first, second = x[..., :half], x[..., half:] + return torch.cat((-second, first), dim=-1) + + +def _apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) -> torch.Tensor: + """Apply rotary position embeddings to tensor x given precomputed cos/sin.""" + return (x * cos.unsqueeze(1)) + (_rotate_half(x) * sin.unsqueeze(1)) + + +class LLMAdapterRotaryEmbedding(nn.Module): + """Rotary position embedding for the LLM Adapter's attention layers.""" + + def __init__(self, head_dim: int, theta: float = 10000.0): + super().__init__() + half_dim = head_dim // 2 + index = torch.arange(half_dim, dtype=torch.float32) + exponent = (2.0 / float(head_dim)) * index + inv_freq = torch.reciprocal( + torch.pow(torch.tensor(theta, dtype=torch.float32), exponent) + ) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + def forward( + self, x: torch.Tensor, position_ids: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + pos = position_ids.to(device=x.device, dtype=torch.float32) + inv = self.inv_freq.to(device=x.device, dtype=torch.float32) + freqs = torch.einsum("bl,d->bld", pos, inv) + emb = freqs.repeat(1, 1, 2) + return emb.cos().to(dtype=x.dtype), emb.sin().to(dtype=x.dtype) + + +class LLMAdapterAttention(nn.Module): + """Attention for the LLM Adapter with QK normalization and rotary position embeddings.""" + + def __init__(self, query_dim: int, context_dim: int, n_heads: int, head_dim: int): + super().__init__() + inner_dim = head_dim * n_heads + self.n_heads = n_heads + self.head_dim = head_dim + + self.q_proj = nn.Linear(query_dim, inner_dim, bias=False) + self.q_norm = nn.RMSNorm(head_dim, eps=1e-6) + self.k_proj = nn.Linear(context_dim, inner_dim, bias=False) + self.k_norm = nn.RMSNorm(head_dim, eps=1e-6) + self.v_proj = nn.Linear(context_dim, inner_dim, bias=False) + self.o_proj = nn.Linear(inner_dim, query_dim, bias=False) + + def forward( + self, + x: torch.Tensor, + *, + context: Optional[torch.Tensor] = None, + attn_mask: Optional[torch.Tensor] = None, + pos_q: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + pos_k: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + ) -> torch.Tensor: + context = x if context is None else context + + q = ( + self.q_proj(x) + .view(x.shape[0], x.shape[1], self.n_heads, self.head_dim) + .transpose(1, 2) + ) + k = ( + self.k_proj(context) + .view(context.shape[0], context.shape[1], self.n_heads, self.head_dim) + .transpose(1, 2) + ) + v = ( + self.v_proj(context) + .view(context.shape[0], context.shape[1], self.n_heads, self.head_dim) + .transpose(1, 2) + ) + + q = self.q_norm(q) + k = self.k_norm(k) + + if pos_q is not None and pos_k is not None: + q = _apply_rope(q, *pos_q) + k = _apply_rope(k, *pos_k) + + y = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask) + y = y.transpose(1, 2).reshape(x.shape[0], x.shape[1], -1).contiguous() + return self.o_proj(y) + + +class LLMAdapterTransformerBlock(nn.Module): + """Single transformer block in the LLM Adapter. + + Each block contains self-attention, cross-attention, and MLP with + RMSNorm pre-normalization. + """ + + def __init__( + self, + source_dim: int, + model_dim: int, + num_heads: int = 16, + ): + super().__init__() + head_dim = model_dim // num_heads + + self.norm_self_attn = nn.RMSNorm(model_dim, eps=1e-6) + self.self_attn = LLMAdapterAttention(model_dim, model_dim, num_heads, head_dim) + + self.norm_cross_attn = nn.RMSNorm(model_dim, eps=1e-6) + self.cross_attn = LLMAdapterAttention(model_dim, source_dim, num_heads, head_dim) + + self.norm_mlp = nn.RMSNorm(model_dim, eps=1e-6) + self.mlp = nn.Sequential( + nn.Linear(model_dim, model_dim * 4), + nn.GELU(), + nn.Linear(model_dim * 4, model_dim), + ) + + def forward( + self, + x: torch.Tensor, + *, + context: torch.Tensor, + target_mask: Optional[torch.Tensor] = None, + source_mask: Optional[torch.Tensor] = None, + pos_target: Tuple[torch.Tensor, torch.Tensor], + pos_source: Tuple[torch.Tensor, torch.Tensor], + ) -> torch.Tensor: + x = x + self.self_attn( + self.norm_self_attn(x), + attn_mask=target_mask, + pos_q=pos_target, + pos_k=pos_target, + ) + x = x + self.cross_attn( + self.norm_cross_attn(x), + context=context, + attn_mask=source_mask, + pos_q=pos_target, + pos_k=pos_source, + ) + x = x + self.mlp(self.norm_mlp(x)) + return x + + +class LLMAdapter(nn.Module): + """LLM Adapter: bridges Qwen3 hidden states and T5-XXL token embeddings. + + Takes Qwen3 hidden states and T5-XXL token IDs, produces conditioning + embeddings for the Cosmos DiT via cross-attention through 6 transformer layers. + + Args: + vocab_size: Size of the T5 token vocabulary. + dim: Model dimension (used for embeddings, projections, and all layers). + num_layers: Number of transformer layers. + num_heads: Number of attention heads. + """ + + def __init__( + self, + vocab_size: int = 32128, + dim: int = 1024, + num_layers: int = 6, + num_heads: int = 16, + ): + super().__init__() + self.embed = nn.Embedding(vocab_size, dim) + self.blocks = nn.ModuleList([ + LLMAdapterTransformerBlock(source_dim=dim, model_dim=dim, num_heads=num_heads) + for _ in range(num_layers) + ]) + self.out_proj = nn.Linear(dim, dim) + self.norm = nn.RMSNorm(dim, eps=1e-6) + self.rotary_emb = LLMAdapterRotaryEmbedding(dim // num_heads) + + def forward( + self, + source_hidden_states: torch.Tensor, + target_input_ids: torch.Tensor, + target_attention_mask: Optional[torch.Tensor] = None, + source_attention_mask: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # Expand attention masks for multi-head attention + if target_attention_mask is not None: + target_attention_mask = target_attention_mask.to(torch.bool) + if target_attention_mask.ndim == 2: + target_attention_mask = target_attention_mask[:, None, None, :] + + if source_attention_mask is not None: + source_attention_mask = source_attention_mask.to(torch.bool) + if source_attention_mask.ndim == 2: + source_attention_mask = source_attention_mask[:, None, None, :] + + context = source_hidden_states + x = self.embed(target_input_ids).to(dtype=context.dtype) + + # Build position IDs and compute rotary embeddings + target_pos_ids = torch.arange(x.shape[1], device=x.device, dtype=torch.long).unsqueeze(0) + source_pos_ids = torch.arange(context.shape[1], device=x.device, dtype=torch.long).unsqueeze(0) + pos_target = self.rotary_emb(x, target_pos_ids) + pos_source = self.rotary_emb(x, source_pos_ids) + + for block in self.blocks: + x = block( + x, + context=context, + target_mask=target_attention_mask, + source_mask=source_attention_mask, + pos_target=pos_target, + pos_source=pos_source, + ) + return self.norm(self.out_proj(x)) + + +# ============================================================================ +# Anima: MiniTrainDIT + LLMAdapter +# Reference implementation: https://github.com/hdae/diffusers-anima +# SPDX-License-Identifier: Apache-2.0 +# ============================================================================ + + +class AnimaTransformer(MiniTrainDIT): + """Anima transformer: Cosmos Predict2 DiT with integrated LLM Adapter. + + Extends MiniTrainDIT by adding the LLMAdapter component that preprocesses + text embeddings before they are fed to the DiT cross-attention layers. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.llm_adapter = LLMAdapter() + + def preprocess_text_embeds( + self, + text_embeds: torch.Tensor, + text_ids: Optional[torch.Tensor], + t5xxl_weights: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """Run the LLM Adapter to produce conditioning for the DiT. + + Args: + text_embeds: Qwen3 hidden states. Shape: (batch, seq_len, 1024). + text_ids: T5-XXL token IDs. Shape: (batch, seq_len). If None, returns text_embeds directly. + t5xxl_weights: Optional per-token weights. Shape: (batch, seq_len, 1). + + Returns: + Conditioning tensor. Shape: (batch, 512, 1024), zero-padded if needed. + """ + if text_ids is None: + return text_embeds + out = self.llm_adapter(text_embeds, text_ids) + if t5xxl_weights is not None: + out = out * t5xxl_weights + if out.shape[1] < 512: + out = F.pad(out, (0, 0, 0, 512 - out.shape[1])) + return out + + def forward( + self, + x: torch.Tensor, + timesteps: torch.Tensor, + context: torch.Tensor, + t5xxl_ids: Optional[torch.Tensor] = None, + t5xxl_weights: Optional[torch.Tensor] = None, + **kwargs, + ) -> torch.Tensor: + """Forward pass with LLM Adapter preprocessing. + + Args: + x: Input latent tensor. Shape: (B, C, T, H, W). + timesteps: Timestep values. Shape: (B,) or (B, T). + context: Qwen3 hidden states. Shape: (B, seq_len, 1024). + t5xxl_ids: T5-XXL token IDs. Shape: (B, seq_len). + t5xxl_weights: Per-token weights. Shape: (B, seq_len, 1). + + Returns: + Denoised output. Shape: (B, C, T, H, W). + """ + if t5xxl_ids is not None: + context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=t5xxl_weights) + return super().forward(x, timesteps, context, **kwargs) diff --git a/invokeai/backend/anima/anima_transformer_patch.py b/invokeai/backend/anima/anima_transformer_patch.py new file mode 100644 index 00000000000..7735355df7d --- /dev/null +++ b/invokeai/backend/anima/anima_transformer_patch.py @@ -0,0 +1,106 @@ +"""Utilities for patching the AnimaTransformer to support regional cross-attention masks.""" + +from contextlib import contextmanager +from typing import Optional + +import torch +import torch.nn.functional as F +from einops import rearrange + +from invokeai.backend.anima.regional_prompting import AnimaRegionalPromptingExtension + + +def _patched_cross_attn_forward( + original_forward, + attn_mask: torch.Tensor, +): + """Create a patched forward for CosmosAttention that injects a cross-attention mask. + + Args: + original_forward: The original CosmosAttention.forward method (bound to self). + attn_mask: Cross-attention mask of shape (img_seq_len, context_seq_len). + """ + def forward(x, context=None, rope_emb=None): + # If the context sequence length doesn't match the mask (e.g. negative conditioning + # has a different number of tokens than positive regional conditioning), skip masking + # and use the original unmasked forward. + actual_context = x if context is None else context + if actual_context.shape[-2] != attn_mask.shape[1]: + return original_forward(x, context, rope_emb=rope_emb) + + self = original_forward.__self__ + + q = self.q_proj(x) + context = x if context is None else context + k = self.k_proj(context) + v = self.v_proj(context) + q, k, v = (rearrange(t, "b ... (h d) -> b ... h d", h=self.n_heads, d=self.head_dim) for t in (q, k, v)) + + q = self.q_norm(q) + k = self.k_norm(k) + v = self.v_norm(v) + + if self.is_selfattn and rope_emb is not None: + from invokeai.backend.anima.anima_transformer import apply_rotary_pos_emb_cosmos + q = apply_rotary_pos_emb_cosmos(q, rope_emb) + k = apply_rotary_pos_emb_cosmos(k, rope_emb) + + in_q_shape = q.shape + in_k_shape = k.shape + q = rearrange(q, "b ... h d -> b h ... d").reshape(in_q_shape[0], in_q_shape[-2], -1, in_q_shape[-1]) + k = rearrange(k, "b ... h d -> b h ... d").reshape(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1]) + v = rearrange(v, "b ... h d -> b h ... d").reshape(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1]) + + # Convert boolean mask to float additive mask for SDPA + # True (attend) -> 0.0, False (block) -> -inf + # Shape: (img_seq_len, context_seq_len) -> (1, 1, img_seq_len, context_seq_len) + float_mask = torch.zeros_like(attn_mask, dtype=q.dtype) + float_mask[~attn_mask] = float("-inf") + expanded_mask = float_mask.unsqueeze(0).unsqueeze(0) + + result = F.scaled_dot_product_attention(q, k, v, attn_mask=expanded_mask) + result = rearrange(result, "b h s d -> b s (h d)") + return self.output_dropout(self.output_proj(result)) + + return forward + + +@contextmanager +def patch_anima_for_regional_prompting( + transformer, + regional_extension: Optional[AnimaRegionalPromptingExtension], +): + """Context manager to temporarily patch the Anima transformer for regional prompting. + + Patches the cross-attention in each DiT block to use a regional attention mask. + Uses alternating pattern: masked on even blocks, unmasked on odd blocks for + global coherence. + + Args: + transformer: The AnimaTransformer instance. + regional_extension: The regional prompting extension. If None or no mask, no patching. + + Yields: + The (possibly patched) transformer. + """ + if regional_extension is None or regional_extension.cross_attn_mask is None: + yield transformer + return + + # Store original forwards + original_forwards = [] + for block_idx, block in enumerate(transformer.blocks): + original_forwards.append(block.cross_attn.forward) + + mask = regional_extension.get_cross_attn_mask(block_idx) + if mask is not None: + block.cross_attn.forward = _patched_cross_attn_forward( + block.cross_attn.forward, mask + ) + + try: + yield transformer + finally: + # Restore original forwards + for block_idx, block in enumerate(transformer.blocks): + block.cross_attn.forward = original_forwards[block_idx] diff --git a/invokeai/backend/anima/conditioning_data.py b/invokeai/backend/anima/conditioning_data.py new file mode 100644 index 00000000000..b96c807835d --- /dev/null +++ b/invokeai/backend/anima/conditioning_data.py @@ -0,0 +1,64 @@ +"""Anima text conditioning data structures. + +Anima uses a dual-conditioning scheme: +- Qwen3 0.6B hidden states (continuous embeddings) +- T5-XXL token IDs (discrete IDs, embedded by the LLM Adapter inside the transformer) + +Both are produced by the text encoder invocation and stored together. + +For regional prompting, multiple conditionings (each with an optional spatial mask) +are concatenated and processed together. The LLM Adapter runs on each region's +conditioning separately, producing per-region context vectors that are concatenated +for the DiT's cross-attention layers. An attention mask restricts which image tokens +attend to which regional context tokens. +""" + +from dataclasses import dataclass + +import torch + +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Range + + +@dataclass +class AnimaTextConditioning: + """Anima text conditioning with Qwen3 hidden states, T5-XXL token IDs, and optional mask. + + Attributes: + qwen3_embeds: Text embeddings from Qwen3 0.6B encoder. + Shape: (seq_len, hidden_size) where hidden_size=1024. + t5xxl_ids: T5-XXL token IDs for the same prompt. + Shape: (seq_len,). + t5xxl_weights: Per-token weights for prompt weighting. + Shape: (seq_len,). Defaults to all ones if not provided. + mask: Optional binary mask for regional prompting. If None, the prompt is global. + Shape: (1, 1, img_seq_len) where img_seq_len = (H // patch_size) * (W // patch_size). + """ + + qwen3_embeds: torch.Tensor + t5xxl_ids: torch.Tensor + t5xxl_weights: torch.Tensor | None = None + mask: torch.Tensor | None = None + + +@dataclass +class AnimaRegionalTextConditioning: + """Container for multiple regional text conditionings processed by the LLM Adapter. + + After the LLM Adapter processes each region's conditioning, the outputs are concatenated. + The DiT cross-attention then uses an attention mask to restrict which image tokens + attend to which region's context tokens. + + Attributes: + context_embeds: Concatenated LLM Adapter outputs from all regional prompts. + Shape: (total_context_len, 1024). + image_masks: List of binary masks for each regional prompt. + If None, the prompt is global (applies to entire image). + Shape: (1, 1, img_seq_len). + context_ranges: List of ranges indicating which portion of context_embeds + corresponds to each regional prompt. + """ + + context_embeds: torch.Tensor + image_masks: list[torch.Tensor | None] + context_ranges: list[Range] diff --git a/invokeai/backend/anima/regional_prompting.py b/invokeai/backend/anima/regional_prompting.py new file mode 100644 index 00000000000..c25e75c9142 --- /dev/null +++ b/invokeai/backend/anima/regional_prompting.py @@ -0,0 +1,174 @@ +"""Regional prompting extension for Anima. + +Anima's architecture uses separate cross-attention in each DiT block: image tokens +(in 5D spatial layout) cross-attend to context tokens (LLM Adapter output). This is +different from Z-Image's unified [img, txt] sequence with self-attention. + +For regional prompting, we: +1. Run the LLM Adapter separately for each regional prompt +2. Concatenate the resulting context vectors +3. Build a cross-attention mask that restricts each image region to attend only to + its corresponding context tokens +4. Patch the DiT's cross-attention to use this mask + +The mask alternation strategy (masked on even blocks, full on odd blocks) helps +maintain global coherence across regions. +""" + +from typing import Optional + +import torch +import torchvision + +from invokeai.backend.anima.conditioning_data import AnimaRegionalTextConditioning, AnimaTextConditioning +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Range +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.mask import to_standard_float_mask + + +class AnimaRegionalPromptingExtension: + """Manages regional prompting for Anima's cross-attention. + + Unlike Z-Image which uses a unified [img, txt] sequence, Anima has separate + cross-attention where image tokens (query) attend to context tokens (key/value). + The cross-attention mask shape is (img_seq_len, context_seq_len). + """ + + def __init__( + self, + regional_text_conditioning: AnimaRegionalTextConditioning, + cross_attn_mask: torch.Tensor | None = None, + ): + self.regional_text_conditioning = regional_text_conditioning + self.cross_attn_mask = cross_attn_mask + + def get_cross_attn_mask(self, block_index: int) -> torch.Tensor | None: + """Get the cross-attention mask for a given block index. + + Uses alternating pattern: apply mask on even blocks, no mask on odd blocks. + This helps balance regional control with global coherence. + """ + if block_index % 2 == 0: + return self.cross_attn_mask + return None + + @classmethod + def from_regional_conditioning( + cls, + regional_text_conditioning: AnimaRegionalTextConditioning, + img_seq_len: int, + ) -> "AnimaRegionalPromptingExtension": + """Create extension from pre-processed regional conditioning. + + Args: + regional_text_conditioning: Regional conditioning with concatenated context and masks. + img_seq_len: Number of image tokens (H_patches * W_patches). + """ + cross_attn_mask = cls._prepare_cross_attn_mask(regional_text_conditioning, img_seq_len) + return cls( + regional_text_conditioning=regional_text_conditioning, + cross_attn_mask=cross_attn_mask, + ) + + @classmethod + def _prepare_cross_attn_mask( + cls, + regional_text_conditioning: AnimaRegionalTextConditioning, + img_seq_len: int, + ) -> torch.Tensor | None: + """Prepare a cross-attention mask for regional prompting. + + The mask shape is (img_seq_len, context_seq_len) where: + - Each image token can attend to context tokens from its assigned region + - Global prompts (mask=None) attend to background regions + + Args: + regional_text_conditioning: The regional text conditioning data. + img_seq_len: Number of image tokens. + + Returns: + Cross-attention mask of shape (img_seq_len, context_seq_len), or None + if no regional masks are present. + """ + has_regional_masks = any(mask is not None for mask in regional_text_conditioning.image_masks) + if not has_regional_masks: + return None + + # Identify background region (area not covered by any mask) + background_region_mask: torch.Tensor | None = None + for image_mask in regional_text_conditioning.image_masks: + if image_mask is not None: + mask_flat = image_mask.view(-1) + if background_region_mask is None: + background_region_mask = torch.ones_like(mask_flat) + background_region_mask = background_region_mask * (1 - mask_flat) + + device = TorchDevice.choose_torch_device() + context_seq_len = regional_text_conditioning.context_embeds.shape[0] + + # Cross-attention mask: (img_seq_len, context_seq_len) + # img tokens are queries, context tokens are keys/values + cross_attn_mask = torch.zeros((img_seq_len, context_seq_len), device=device, dtype=torch.float16) + + for image_mask, context_range in zip( + regional_text_conditioning.image_masks, + regional_text_conditioning.context_ranges, + strict=True, + ): + ctx_start = context_range.start + ctx_end = context_range.end + + if image_mask is not None: + # Regional prompt: only masked image tokens attend to this region's context + mask_flat = image_mask.view(img_seq_len) + cross_attn_mask[:, ctx_start:ctx_end] = mask_flat.view(img_seq_len, 1) + else: + # Global prompt: background image tokens attend to this context + if background_region_mask is not None: + cross_attn_mask[:, ctx_start:ctx_end] = background_region_mask.view(img_seq_len, 1) + else: + cross_attn_mask[:, ctx_start:ctx_end] = 1.0 + + # Convert to boolean + cross_attn_mask = cross_attn_mask > 0.5 + return cross_attn_mask + + @staticmethod + def preprocess_regional_prompt_mask( + mask: Optional[torch.Tensor], + target_height: int, + target_width: int, + dtype: torch.dtype, + device: torch.device, + ) -> torch.Tensor: + """Preprocess a regional prompt mask to match the target image token grid. + + Args: + mask: Input mask tensor. If None, returns a mask of all ones. + target_height: Height of the image token grid (H // patch_size). + target_width: Width of the image token grid (W // patch_size). + dtype: Target dtype for the mask. + device: Target device for the mask. + + Returns: + Processed mask of shape (1, 1, target_height * target_width). + """ + img_seq_len = target_height * target_width + + if mask is None: + return torch.ones((1, 1, img_seq_len), dtype=dtype, device=device) + + mask = to_standard_float_mask(mask, out_dtype=dtype) + + tf = torchvision.transforms.Resize( + (target_height, target_width), + interpolation=torchvision.transforms.InterpolationMode.NEAREST, + ) + + if mask.ndim == 2: + mask = mask.unsqueeze(0) + if mask.ndim == 3: + mask = mask.unsqueeze(0) + + resized_mask = tf(mask) + return resized_mask.flatten(start_dim=2).to(device=device) diff --git a/invokeai/backend/flux/schedulers.py b/invokeai/backend/flux/schedulers.py index e5a8a7137c2..05e6bb085f0 100644 --- a/invokeai/backend/flux/schedulers.py +++ b/invokeai/backend/flux/schedulers.py @@ -60,3 +60,23 @@ if _HAS_LCM: ZIMAGE_SCHEDULER_MAP["lcm"] = FlowMatchLCMScheduler + + +# Anima scheduler types (same Flow Matching schedulers as Flux/Z-Image) +# Anima uses rectified flow with shift=3.0 and multiplier=1000. +# Recommended: 30 steps with Euler, CFG 4-5. +ANIMA_SCHEDULER_NAME_VALUES = Literal["euler", "heun", "lcm"] + +ANIMA_SCHEDULER_LABELS: dict[str, str] = { + "euler": "Euler", + "heun": "Heun (2nd order)", + "lcm": "LCM", +} + +ANIMA_SCHEDULER_MAP: dict[str, Type[SchedulerMixin]] = { + "euler": FlowMatchEulerDiscreteScheduler, + "heun": FlowMatchHeunDiscreteScheduler, +} + +if _HAS_LCM: + ANIMA_SCHEDULER_MAP["lcm"] = FlowMatchLCMScheduler diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index 7702d4a5d9b..7eb391f610a 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -51,6 +51,7 @@ LoRA_LyCORIS_SD1_Config, LoRA_LyCORIS_SD2_Config, LoRA_LyCORIS_SDXL_Config, + LoRA_LyCORIS_Anima_Config, LoRA_LyCORIS_ZImage_Config, LoRA_OMI_FLUX_Config, LoRA_OMI_SDXL_Config, @@ -58,6 +59,7 @@ ) from invokeai.backend.model_manager.configs.main import ( Main_BnBNF4_FLUX_Config, + Main_Checkpoint_Anima_Config, Main_Checkpoint_Flux2_Config, Main_Checkpoint_FLUX_Config, Main_Checkpoint_SD1_Config, @@ -101,6 +103,7 @@ ) from invokeai.backend.model_manager.configs.unknown import Unknown_Config from invokeai.backend.model_manager.configs.vae import ( + VAE_Checkpoint_Anima_Config, VAE_Checkpoint_Flux2_Config, VAE_Checkpoint_FLUX_Config, VAE_Checkpoint_SD1_Config, @@ -170,6 +173,7 @@ Annotated[Main_Checkpoint_Flux2_Config, Main_Checkpoint_Flux2_Config.get_tag()], Annotated[Main_Checkpoint_FLUX_Config, Main_Checkpoint_FLUX_Config.get_tag()], Annotated[Main_Checkpoint_ZImage_Config, Main_Checkpoint_ZImage_Config.get_tag()], + Annotated[Main_Checkpoint_Anima_Config, Main_Checkpoint_Anima_Config.get_tag()], # Main (Pipeline) - quantized formats # IMPORTANT: FLUX.2 must be checked BEFORE FLUX.1 because FLUX.2 has specific validation # that will reject FLUX.1 models, but FLUX.1 validation may incorrectly match FLUX.2 models @@ -183,6 +187,7 @@ Annotated[VAE_Checkpoint_SDXL_Config, VAE_Checkpoint_SDXL_Config.get_tag()], Annotated[VAE_Checkpoint_FLUX_Config, VAE_Checkpoint_FLUX_Config.get_tag()], Annotated[VAE_Checkpoint_Flux2_Config, VAE_Checkpoint_Flux2_Config.get_tag()], + Annotated[VAE_Checkpoint_Anima_Config, VAE_Checkpoint_Anima_Config.get_tag()], # VAE - diffusers format Annotated[VAE_Diffusers_SD1_Config, VAE_Diffusers_SD1_Config.get_tag()], Annotated[VAE_Diffusers_SDXL_Config, VAE_Diffusers_SDXL_Config.get_tag()], @@ -207,6 +212,7 @@ Annotated[LoRA_LyCORIS_Flux2_Config, LoRA_LyCORIS_Flux2_Config.get_tag()], Annotated[LoRA_LyCORIS_FLUX_Config, LoRA_LyCORIS_FLUX_Config.get_tag()], Annotated[LoRA_LyCORIS_ZImage_Config, LoRA_LyCORIS_ZImage_Config.get_tag()], + Annotated[LoRA_LyCORIS_Anima_Config, LoRA_LyCORIS_Anima_Config.get_tag()], # LoRA - OMI format Annotated[LoRA_OMI_SDXL_Config, LoRA_OMI_SDXL_Config.get_tag()], Annotated[LoRA_OMI_FLUX_Config, LoRA_OMI_FLUX_Config.get_tag()], diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index 1619c9d6f06..1503a6fa868 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -31,6 +31,10 @@ ZImageVariantType, ) from invokeai.backend.model_manager.util.model_util import lora_token_vector_length +from invokeai.backend.patches.lora_conversions.anima_lora_constants import ( + has_cosmos_dit_kohya_keys, + has_cosmos_dit_peft_keys, +) from invokeai.backend.patches.lora_conversions.flux_control_lora_utils import is_state_dict_likely_flux_control @@ -637,6 +641,13 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: return BaseModelType.Flux state_dict = mod.load_state_dict() + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + # Rule out Anima LoRAs — their lora_te_ keys have shapes that + # lora_token_vector_length() misidentifies as SD2/SDXL. + if has_cosmos_dit_kohya_keys(str_keys) or has_cosmos_dit_peft_keys(str_keys): + raise NotAMatchError("model looks like an Anima LoRA, not a Stable Diffusion LoRA") + # If we've gotten here, we assume that the model is a Stable Diffusion model token_vector_length = lora_token_vector_length(state_dict) if token_vector_length == 768: @@ -757,6 +768,63 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: raise NotAMatchError("model does not look like a Z-Image LoRA") +class LoRA_LyCORIS_Anima_Config(LoRA_LyCORIS_Config_Base, Config_Base): + """Model config for Anima LoRA models in LyCORIS format.""" + + base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima) + + @classmethod + def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: + """Anima LoRAs use Kohya-style keys targeting Cosmos DiT blocks. + + Anima LoRAs have keys like: + - lora_unet_blocks_0_cross_attn_k_proj.lora_down.weight (Kohya format) + - diffusion_model.blocks.0.cross_attn.k_proj.lora_A.weight (diffusers PEFT format) + - transformer.blocks.0.cross_attn.k_proj.lora_A.weight (diffusers PEFT format) + + Detection requires Cosmos DiT-specific subcomponent names (cross_attn, + self_attn, mlp, adaln_modulation) to avoid false-positives on other + architectures that also use ``blocks`` in their paths. + """ + state_dict = mod.load_state_dict() + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + has_cosmos_keys = has_cosmos_dit_kohya_keys(str_keys) or has_cosmos_dit_peft_keys(str_keys) + + # Also check for LoRA/LoKR weight suffixes + has_lora_suffix = state_dict_has_any_keys_ending_with( + state_dict, + { + "lora_A.weight", + "lora_B.weight", + "lora_down.weight", + "lora_up.weight", + "dora_scale", + ".lokr_w1", + ".lokr_w2", + }, + ) + + if has_cosmos_keys and has_lora_suffix: + return + + raise NotAMatchError("model does not match Anima LoRA heuristics") + + @classmethod + def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: + """Anima LoRAs target Cosmos DiT blocks (blocks.X.cross_attn, blocks.X.self_attn, etc.). + + Uses Cosmos DiT-specific subcomponent names to avoid false-positives. + """ + state_dict = mod.load_state_dict() + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + if has_cosmos_dit_kohya_keys(str_keys) or has_cosmos_dit_peft_keys(str_keys): + return BaseModelType.Anima + + raise NotAMatchError("model does not look like an Anima LoRA") + + class ControlAdapter_Config_Base(ABC, BaseModel): default_settings: ControlAdapterDefaultSettings | None = Field(None) diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 6f737ceb92d..b46451dbd77 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -76,6 +76,8 @@ def from_base( else: # Turbo (distilled) uses fewer steps, no CFG return cls(steps=9, cfg_scale=1.0, width=1024, height=1024) + case BaseModelType.Anima: + return cls(steps=35, cfg_scale=4.5, width=1024, height=1024) case BaseModelType.Flux2: # Different defaults based on variant if variant == Flux2VariantType.Klein9BBase: @@ -1084,6 +1086,38 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - ) +def _has_anima_keys(state_dict: dict[str | int, Any]) -> bool: + """Check if state dict contains Anima model keys. + + Anima models are identified by the presence of `llm_adapter` keys + (unique to Anima - the LLM Adapter that bridges Qwen3 text encoder to the Cosmos DiT) + alongside Cosmos Predict2 DiT keys (blocks, t_embedder, x_embedder, final_layer). + + The checkpoint keys may have a `net.` prefix (e.g. `net.llm_adapter.`, `net.blocks.`). + """ + has_llm_adapter = False + has_cosmos_dit = False + + # Cosmos DiT key prefixes — support both with and without `net.` prefix + cosmos_prefixes = ( + "blocks.", "t_embedder.", "x_embedder.", "final_layer.", + "net.blocks.", "net.t_embedder.", "net.x_embedder.", "net.final_layer.", + ) + + for key in state_dict.keys(): + if isinstance(key, int): + continue + if key.startswith("llm_adapter.") or key.startswith("net.llm_adapter."): + has_llm_adapter = True + for prefix in cosmos_prefixes: + if key.startswith(prefix): + has_cosmos_dit = True + if has_llm_adapter and has_cosmos_dit: + return True + + return False + + class Main_Diffusers_ZImage_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base): """Model config for Z-Image diffusers models (Z-Image-Turbo, Z-Image-Base).""" @@ -1199,3 +1233,30 @@ def _validate_looks_like_gguf_quantized(cls, mod: ModelOnDisk) -> None: has_ggml_tensors = _has_ggml_tensors(mod.load_state_dict()) if not has_ggml_tensors: raise NotAMatchError("state dict does not look like GGUF quantized") + + +class Main_Checkpoint_Anima_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base): + """Model config for Anima single-file checkpoint models (safetensors). + + Anima is built on NVIDIA Cosmos Predict2 DiT with a custom LLM Adapter + that bridges Qwen3 0.6B text encoder outputs to the DiT. + """ + + base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima) + format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_file(mod) + + raise_for_override_fields(cls, override_fields) + + cls._validate_looks_like_anima_model(mod) + + return cls(**override_fields) + + @classmethod + def _validate_looks_like_anima_model(cls, mod: ModelOnDisk) -> None: + has_anima_keys = _has_anima_keys(mod.load_state_dict()) + if not has_anima_keys: + raise NotAMatchError("state dict does not look like an Anima model") diff --git a/invokeai/backend/model_manager/configs/qwen3_encoder.py b/invokeai/backend/model_manager/configs/qwen3_encoder.py index 2e24fee9185..82cf3b62c8f 100644 --- a/invokeai/backend/model_manager/configs/qwen3_encoder.py +++ b/invokeai/backend/model_manager/configs/qwen3_encoder.py @@ -47,9 +47,10 @@ def _has_ggml_tensors(state_dict: dict[str | int, Any]) -> bool: def _get_qwen3_variant_from_state_dict(state_dict: dict[str | int, Any]) -> Optional[Qwen3VariantType]: - """Determine Qwen3 variant (4B vs 8B) from state dict based on hidden_size. + """Determine Qwen3 variant (0.6B, 4B, or 8B) from state dict based on hidden_size. The hidden_size can be determined from the embed_tokens.weight tensor shape: + - Qwen3 0.6B: hidden_size = 1024 - Qwen3 4B: hidden_size = 2560 - Qwen3 8B: hidden_size = 4096 @@ -57,6 +58,7 @@ def _get_qwen3_variant_from_state_dict(state_dict: dict[str | int, Any]) -> Opti For PyTorch format, the key is 'model.embed_tokens.weight'. """ # Hidden size thresholds + QWEN3_06B_HIDDEN_SIZE = 1024 QWEN3_4B_HIDDEN_SIZE = 2560 QWEN3_8B_HIDDEN_SIZE = 4096 @@ -91,7 +93,9 @@ def _get_qwen3_variant_from_state_dict(state_dict: dict[str | int, Any]) -> Opti return None # Determine variant based on hidden_size - if hidden_size == QWEN3_4B_HIDDEN_SIZE: + if hidden_size == QWEN3_06B_HIDDEN_SIZE: + return Qwen3VariantType.Qwen3_06B + elif hidden_size == QWEN3_4B_HIDDEN_SIZE: return Qwen3VariantType.Qwen3_4B elif hidden_size == QWEN3_8B_HIDDEN_SIZE: return Qwen3VariantType.Qwen3_8B @@ -206,6 +210,7 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - @classmethod def _get_variant_from_config(cls, config_path) -> Qwen3VariantType: """Get variant from config.json based on hidden_size.""" + QWEN3_06B_HIDDEN_SIZE = 1024 QWEN3_4B_HIDDEN_SIZE = 2560 QWEN3_8B_HIDDEN_SIZE = 4096 @@ -217,6 +222,8 @@ def _get_variant_from_config(cls, config_path) -> Qwen3VariantType: return Qwen3VariantType.Qwen3_8B elif hidden_size == QWEN3_4B_HIDDEN_SIZE: return Qwen3VariantType.Qwen3_4B + elif hidden_size == QWEN3_06B_HIDDEN_SIZE: + return Qwen3VariantType.Qwen3_06B else: # Default to 4B for unknown sizes return Qwen3VariantType.Qwen3_4B diff --git a/invokeai/backend/model_manager/configs/vae.py b/invokeai/backend/model_manager/configs/vae.py index cc079cb9aae..ce26a94a6e9 100644 --- a/invokeai/backend/model_manager/configs/vae.py +++ b/invokeai/backend/model_manager/configs/vae.py @@ -175,6 +175,43 @@ def _validate_is_flux2_vae(cls, mod: ModelOnDisk) -> None: raise NotAMatchError("state dict does not look like a FLUX.2 VAE") +def _has_anima_vae_keys(state_dict: dict[str | int, Any]) -> bool: + """Check if state dict looks like an Anima QwenImage VAE (AutoencoderKLQwenImage). + + The Anima VAE has a distinctive structure with: + - encoder.downsamples.* (instead of encoder.down_blocks) + - decoder.upsamples.* (instead of decoder.up_blocks) + - decoder.head.* / decoder.middle.* + - Top-level conv1/conv2 weights + """ + required_prefixes = { + "encoder.downsamples.", + "decoder.upsamples.", + "decoder.middle.", + } + return all(any(str(k).startswith(prefix) for k in state_dict) for prefix in required_prefixes) + + +class VAE_Checkpoint_Anima_Config(Checkpoint_Config_Base, Config_Base): + """Model config for Anima QwenImage VAE checkpoint models (AutoencoderKLQwenImage).""" + + type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) + format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) + base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_file(mod) + + raise_for_override_fields(cls, override_fields) + + state_dict = mod.load_state_dict() + if not _has_anima_vae_keys(state_dict): + raise NotAMatchError("state dict does not look like an Anima QwenImage VAE") + + return cls(**override_fields) + + class VAE_Diffusers_Config_Base(Diffusers_Config_Base): """Model config for standalone VAE models (diffusers version).""" diff --git a/invokeai/backend/model_manager/load/model_loaders/anima.py b/invokeai/backend/model_manager/load/model_loaders/anima.py new file mode 100644 index 00000000000..e0e8ded029c --- /dev/null +++ b/invokeai/backend/model_manager/load/model_loaders/anima.py @@ -0,0 +1,127 @@ +# Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team +"""Class for Anima model loading in InvokeAI.""" + +from pathlib import Path +from typing import Optional + +import accelerate +import torch + +from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base +from invokeai.backend.model_manager.configs.factory import AnyModelConfig +from invokeai.backend.model_manager.configs.main import Main_Checkpoint_Anima_Config +from invokeai.backend.model_manager.load.load_default import ModelLoader +from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry +from invokeai.backend.model_manager.taxonomy import ( + AnyModel, + BaseModelType, + ModelFormat, + ModelType, + SubModelType, +) +from invokeai.backend.util.devices import TorchDevice + + +@ModelLoaderRegistry.register(base=BaseModelType.Anima, type=ModelType.Main, format=ModelFormat.Checkpoint) +class AnimaCheckpointModel(ModelLoader): + """Class to load Anima transformer models from single-file checkpoints. + + The Anima checkpoint contains both the MiniTrainDIT backbone and the LLM Adapter + under a shared `net.` prefix. The loader strips this prefix and instantiates + the AnimaTransformer model with the correct architecture parameters. + """ + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, Checkpoint_Config_Base): + raise ValueError("Only CheckpointConfigBase models are currently supported here.") + + match submodel_type: + case SubModelType.Transformer: + return self._load_from_singlefile(config) + + raise ValueError( + f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" + ) + + def _load_from_singlefile( + self, + config: AnyModelConfig, + ) -> AnyModel: + from safetensors.torch import load_file + + from invokeai.backend.anima.anima_transformer import AnimaTransformer + + if not isinstance(config, Main_Checkpoint_Anima_Config): + raise TypeError( + f"Expected Main_Checkpoint_Anima_Config, got {type(config).__name__}. " + "Model configuration type mismatch." + ) + model_path = Path(config.path) + + # Load the state dict from safetensors + sd = load_file(model_path) + + # Strip the `net.` prefix that all Anima checkpoint keys have + # e.g., "net.blocks.0.self_attn.q_proj.weight" -> "blocks.0.self_attn.q_proj.weight" + prefix_to_strip = None + for prefix in ["net."]: + if any(k.startswith(prefix) for k in sd.keys() if isinstance(k, str)): + prefix_to_strip = prefix + break + + if prefix_to_strip: + stripped_sd = {} + for key, value in sd.items(): + if isinstance(key, str) and key.startswith(prefix_to_strip): + stripped_sd[key[len(prefix_to_strip) :]] = value + else: + stripped_sd[key] = value + sd = stripped_sd + + # Create an empty AnimaTransformer with Anima's default architecture parameters + with accelerate.init_empty_weights(): + model = AnimaTransformer( + max_img_h=240, + max_img_w=240, + max_frames=1, + in_channels=16, + out_channels=16, + patch_spatial=2, + patch_temporal=1, + concat_padding_mask=True, + model_channels=2048, + num_blocks=28, + num_heads=16, + mlp_ratio=4.0, + crossattn_emb_channels=1024, + pos_emb_cls="rope3d", + use_adaln_lora=True, + adaln_lora_dim=256, + extra_per_block_abs_pos_emb=False, + image_model="anima", + ) + + # Determine safe dtype + target_device = TorchDevice.choose_torch_device() + model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device) + + # Handle memory management + new_sd_size = sum(ten.nelement() * model_dtype.itemsize for ten in sd.values()) + self._ram_cache.make_room(new_sd_size) + + # Convert to target dtype (skip non-float tensors like embedding indices) + for k in sd.keys(): + if sd[k].is_floating_point(): + sd[k] = sd[k].to(model_dtype) + + # Filter out rotary embedding inv_freq buffers that are regenerated at runtime + keys_to_remove = [k for k in sd.keys() if k.endswith(".inv_freq")] + for k in keys_to_remove: + del sd[k] + + model.load_state_dict(sd, assign=True, strict=False) + return model diff --git a/invokeai/backend/model_manager/load/model_loaders/lora.py b/invokeai/backend/model_manager/load/model_loaders/lora.py index d39982456af..2f9a353124d 100644 --- a/invokeai/backend/model_manager/load/model_loaders/lora.py +++ b/invokeai/backend/model_manager/load/model_loaders/lora.py @@ -54,6 +54,7 @@ ) from invokeai.backend.patches.lora_conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict from invokeai.backend.patches.lora_conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format +from invokeai.backend.patches.lora_conversions.anima_lora_conversion_utils import lora_model_from_anima_state_dict from invokeai.backend.patches.lora_conversions.z_image_lora_conversion_utils import lora_model_from_z_image_state_dict @@ -155,6 +156,9 @@ def _load_model( # Z-Image LoRAs use diffusers PEFT format with transformer and/or Qwen3 encoder layers. # We set alpha=None to use rank as alpha (common default). model = lora_model_from_z_image_state_dict(state_dict=state_dict, alpha=None) + elif self._model_base == BaseModelType.Anima: + # Anima LoRAs use Kohya-style or diffusers PEFT format targeting Cosmos DiT blocks. + model = lora_model_from_anima_state_dict(state_dict=state_dict, alpha=None) else: raise ValueError(f"Unsupported LoRA base model: {self._model_base}") diff --git a/invokeai/backend/model_manager/load/model_loaders/vae.py b/invokeai/backend/model_manager/load/model_loaders/vae.py index e91903ccdad..db26e8c6547 100644 --- a/invokeai/backend/model_manager/load/model_loaders/vae.py +++ b/invokeai/backend/model_manager/load/model_loaders/vae.py @@ -6,7 +6,7 @@ from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL from invokeai.backend.model_manager.configs.factory import AnyModelConfig -from invokeai.backend.model_manager.configs.vae import VAE_Checkpoint_Config_Base +from invokeai.backend.model_manager.configs.vae import VAE_Checkpoint_Anima_Config, VAE_Checkpoint_Config_Base from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.model_manager.taxonomy import ( @@ -28,7 +28,14 @@ def _load_model( config: AnyModelConfig, submodel_type: Optional[SubModelType] = None, ) -> AnyModel: - if isinstance(config, VAE_Checkpoint_Config_Base): + if isinstance(config, VAE_Checkpoint_Anima_Config): + from diffusers.models.autoencoders import AutoencoderKLWan + + return AutoencoderKLWan.from_single_file( + config.path, + torch_dtype=self._torch_dtype, + ) + elif isinstance(config, VAE_Checkpoint_Config_Base): return AutoencoderKL.from_single_file( config.path, torch_dtype=self._torch_dtype, diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 9f86f83dc59..3fcca7f034f 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -862,6 +862,36 @@ class StarterModelBundle(BaseModel): ) # endregion +# region Anima +anima_qwen3_encoder = StarterModel( + name="Anima Qwen3 0.6B Text Encoder", + base=BaseModelType.Any, + source="https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/text_encoders/qwen_3_06b_base.safetensors", + description="Qwen3 0.6B text encoder for Anima. ~1.2GB", + type=ModelType.Qwen3Encoder, + format=ModelFormat.Checkpoint, +) + +anima_vae = StarterModel( + name="Anima QwenImage VAE", + base=BaseModelType.Anima, + source="https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/vae/qwen_image_vae.safetensors", + description="QwenImage VAE for Anima (fine-tuned Wan 2.1 VAE, 16 latent channels). ~200MB", + type=ModelType.VAE, + format=ModelFormat.Checkpoint, +) + +anima_preview2 = StarterModel( + name="Anima Preview 2", + base=BaseModelType.Anima, + source="https://huggingface.co/circlestone-labs/Anima/resolve/main/split_files/diffusion_models/anima-preview2.safetensors", + description="Anima Preview 2 - 2B parameter anime-focused text-to-image model built on Cosmos Predict2 DiT. ~4.5GB", + type=ModelType.Main, + format=ModelFormat.Checkpoint, + dependencies=[anima_qwen3_encoder, anima_vae, t5_base_encoder], +) +# endregion + # List of starter models, displayed on the frontend. # The order/sort of this list is not changed by the frontend - set it how you want it here. STARTER_MODELS: list[StarterModel] = [ @@ -957,6 +987,9 @@ class StarterModelBundle(BaseModel): z_image_qwen3_encoder_quantized, z_image_controlnet_union, z_image_controlnet_tile, + anima_preview2, + anima_qwen3_encoder, + anima_vae, ] sd1_bundle: list[StarterModel] = [ @@ -1025,12 +1058,20 @@ class StarterModelBundle(BaseModel): flux2_klein_qwen3_4b_encoder, ] +anima_bundle: list[StarterModel] = [ + anima_preview2, + anima_qwen3_encoder, + anima_vae, + t5_base_encoder, +] + STARTER_BUNDLES: dict[str, StarterModelBundle] = { BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle), BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle), BaseModelType.Flux: StarterModelBundle(name="FLUX.1 dev", models=flux_bundle), BaseModelType.Flux2: StarterModelBundle(name="FLUX.2 Klein", models=flux2_klein_bundle), BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle), + BaseModelType.Anima: StarterModelBundle(name="Anima", models=anima_bundle), } assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models" diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index c002418a6bd..2f3afe0136f 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -52,6 +52,8 @@ class BaseModelType(str, Enum): """Indicates the model is associated with CogView 4 model architecture.""" ZImage = "z-image" """Indicates the model is associated with Z-Image model architecture, including Z-Image-Turbo.""" + Anima = "anima" + """Indicates the model is associated with Anima model architecture (Cosmos Predict2 DiT + LLM Adapter).""" Unknown = "unknown" """Indicates the model's base architecture is unknown.""" @@ -152,6 +154,9 @@ class Qwen3VariantType(str, Enum): Qwen3_8B = "qwen3_8b" """Qwen3 8B text encoder (hidden_size=4096). Used by FLUX.2 Klein 9B.""" + Qwen3_06B = "qwen3_06b" + """Qwen3 0.6B text encoder (hidden_size=1024). Used by Anima.""" + class ModelFormat(str, Enum): """Storage format of model.""" diff --git a/invokeai/backend/patches/lora_conversions/anima_lora_constants.py b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py new file mode 100644 index 00000000000..58c3e58b969 --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py @@ -0,0 +1,47 @@ +# Anima LoRA prefix constants +# These prefixes are used for key mapping when applying LoRA patches to Anima models + +import re + +# Prefix for Anima transformer (Cosmos DiT architecture) LoRA layers +ANIMA_LORA_TRANSFORMER_PREFIX = "lora_transformer-" + +# Prefix for Qwen3 text encoder LoRA layers +ANIMA_LORA_QWEN3_PREFIX = "lora_qwen3-" + +# --------------------------------------------------------------------------- +# Cosmos DiT detection helpers +# +# Shared between ``anima_lora_conversion_utils.is_state_dict_likely_anima_lora`` +# and the config probing code in ``configs/lora.py``. Kept here (rather than +# in ``anima_lora_conversion_utils``) to avoid circular imports. +# --------------------------------------------------------------------------- + +# Cosmos DiT subcomponent names unique to the Anima / Cosmos Predict2 architecture. +_COSMOS_DIT_SUBCOMPONENTS_RE = r"(cross_attn|self_attn|mlp|adaln_modulation)" + +# Kohya format: lora_unet_[llm_adapter_]blocks_N_ +_KOHYA_ANIMA_RE = re.compile( + r"lora_unet_(llm_adapter_)?blocks_\d+_" + _COSMOS_DIT_SUBCOMPONENTS_RE +) + +# PEFT format: .blocks.N. +_PEFT_ANIMA_RE = re.compile( + r"(diffusion_model|transformer|base_model\.model\.transformer)\.blocks\.\d+\." + _COSMOS_DIT_SUBCOMPONENTS_RE +) + + +def has_cosmos_dit_kohya_keys(str_keys: list[str]) -> bool: + """Check for Kohya-style keys targeting Cosmos DiT blocks with specific subcomponents. + + Requires both the ``lora_unet_[llm_adapter_]blocks_N_`` prefix **and** a + Cosmos DiT subcomponent name (cross_attn, self_attn, mlp, adaln_modulation) + to avoid false-positives on other architectures that might also use bare + ``blocks`` in their key paths. + """ + return any(_KOHYA_ANIMA_RE.search(k) is not None for k in str_keys) + + +def has_cosmos_dit_peft_keys(str_keys: list[str]) -> bool: + """Check for diffusers PEFT keys targeting Cosmos DiT blocks with specific subcomponents.""" + return any(_PEFT_ANIMA_RE.search(k) is not None for k in str_keys) diff --git a/invokeai/backend/patches/lora_conversions/anima_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/anima_lora_conversion_utils.py new file mode 100644 index 00000000000..bc40f69897a --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/anima_lora_conversion_utils.py @@ -0,0 +1,324 @@ +"""Anima LoRA conversion utilities. + +Anima uses a Cosmos Predict2 DiT transformer architecture. +LoRAs for Anima typically follow the Kohya-style format with underscore-separated keys +(e.g., lora_unet_blocks_0_cross_attn_k_proj) that map to model parameter paths +(e.g., blocks.0.cross_attn.k_proj). + +Some Anima LoRAs also target the Qwen3 text encoder with lora_te_ prefix keys +(e.g., lora_te_layers_0_self_attn_q_proj -> layers.0.self_attn.q_proj). +""" + +import re +from typing import Dict + +import torch + +from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict +from invokeai.backend.util.logging import InvokeAILogger + +logger = InvokeAILogger.get_logger(__name__) +from invokeai.backend.patches.lora_conversions.anima_lora_constants import ( + ANIMA_LORA_QWEN3_PREFIX, + ANIMA_LORA_TRANSFORMER_PREFIX, + has_cosmos_dit_kohya_keys, + has_cosmos_dit_peft_keys, +) +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw + + +def is_state_dict_likely_anima_lora(state_dict: dict[str | int, torch.Tensor]) -> bool: + """Checks if the provided state dict is likely an Anima LoRA. + + Anima LoRAs use Kohya-style naming with lora_unet_ prefix and underscore-separated + model key paths targeting Cosmos DiT blocks. Detection requires Cosmos DiT-specific + subcomponent names (cross_attn, self_attn, mlp, adaln_modulation) to avoid + false-positives on other architectures that also use ``blocks`` in their paths. + """ + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + if has_cosmos_dit_kohya_keys(str_keys): + return True + + return has_cosmos_dit_peft_keys(str_keys) + + +# Mapping from Kohya underscore-style substrings to model parameter names. +# Order matters: longer/more specific patterns should come first to avoid partial matches. +_KOHYA_UNET_KEY_REPLACEMENTS = [ + ("adaln_modulation_cross_attn_", "adaln_modulation_cross_attn."), + ("adaln_modulation_self_attn_", "adaln_modulation_self_attn."), + ("adaln_modulation_mlp_", "adaln_modulation_mlp."), + ("cross_attn_k_proj", "cross_attn.k_proj"), + ("cross_attn_q_proj", "cross_attn.q_proj"), + ("cross_attn_v_proj", "cross_attn.v_proj"), + ("cross_attn_output_proj", "cross_attn.output_proj"), + ("cross_attn_o_proj", "cross_attn.o_proj"), + ("self_attn_k_proj", "self_attn.k_proj"), + ("self_attn_q_proj", "self_attn.q_proj"), + ("self_attn_v_proj", "self_attn.v_proj"), + ("self_attn_output_proj", "self_attn.output_proj"), + ("self_attn_o_proj", "self_attn.o_proj"), + ("mlp_layer1", "mlp.layer1"), + ("mlp_layer2", "mlp.layer2"), +] + +# Mapping for Qwen3 text encoder Kohya keys. +_KOHYA_TE_KEY_REPLACEMENTS = [ + ("self_attn_k_proj", "self_attn.k_proj"), + ("self_attn_q_proj", "self_attn.q_proj"), + ("self_attn_v_proj", "self_attn.v_proj"), + ("self_attn_o_proj", "self_attn.o_proj"), + ("mlp_down_proj", "mlp.down_proj"), + ("mlp_gate_proj", "mlp.gate_proj"), + ("mlp_up_proj", "mlp.up_proj"), +] + + +def _convert_kohya_unet_key(kohya_layer_name: str) -> str: + """Convert a Kohya-style LoRA layer name to a model parameter path. + + Example: lora_unet_blocks_0_cross_attn_k_proj -> blocks.0.cross_attn.k_proj + Example: lora_unet_llm_adapter_blocks_0_cross_attn_k_proj -> llm_adapter.blocks.0.cross_attn.k_proj + """ + key = kohya_layer_name + if key.startswith("lora_unet_"): + key = key[len("lora_unet_"):] + + # Handle llm_adapter prefix: strip it, run the standard block conversion, then re-add with dot + llm_adapter_prefix = "" + if key.startswith("llm_adapter_"): + key = key[len("llm_adapter_"):] + llm_adapter_prefix = "llm_adapter." + + # Convert blocks_N_ to blocks.N. + key = re.sub(r"^blocks_(\d+)_", r"blocks.\1.", key) + + # Apply known replacements for subcomponent names + for old, new in _KOHYA_UNET_KEY_REPLACEMENTS: + if old in key: + key = key.replace(old, new) + break + + return llm_adapter_prefix + key + + +def _convert_kohya_te_key(kohya_layer_name: str) -> str: + """Convert a Kohya-style text encoder LoRA layer name to a model parameter path. + + The Qwen3 text encoder is loaded as Qwen3ForCausalLM which wraps the base model + under a `model.` prefix, so the final path must include it. + + Example: lora_te_layers_0_self_attn_q_proj -> model.layers.0.self_attn.q_proj + """ + key = kohya_layer_name + if key.startswith("lora_te_"): + key = key[len("lora_te_"):] + + # Convert layers_N_ to layers.N. + key = re.sub(r"^layers_(\d+)_", r"layers.\1.", key) + + # Apply known replacements + for old, new in _KOHYA_TE_KEY_REPLACEMENTS: + if old in key: + key = key.replace(old, new) + break + + # Qwen3ForCausalLM wraps the base Qwen3Model under `model.` + key = f"model.{key}" + + return key + + +def _make_layer_patch(layer_dict: dict[str, torch.Tensor]) -> BaseLayerPatch: + """Create a layer patch from a layer dict, handling DoRA+LoKR edge case. + + Some Anima LoRAs combine DoRA (dora_scale) with LoKR (lokr_w1/lokr_w2) weights. + The shared any_lora_layer_from_state_dict checks dora_scale first and expects + lora_up/lora_down keys, which don't exist in LoKR layers. We strip dora_scale + from LoKR layers so they fall through to the LoKR handler instead. + """ + has_lokr = "lokr_w1" in layer_dict or "lokr_w1_a" in layer_dict + has_dora = "dora_scale" in layer_dict + if has_lokr and has_dora: + layer_dict = {k: v for k, v in layer_dict.items() if k != "dora_scale"} + logger.debug("Stripped dora_scale from LoKR layer (DoRA+LoKR combination not supported, using LoKR only)") + return any_lora_layer_from_state_dict(layer_dict) + + +def lora_model_from_anima_state_dict( + state_dict: Dict[str, torch.Tensor], alpha: float | None = None +) -> ModelPatchRaw: + """Convert an Anima LoRA state dict to a ModelPatchRaw. + + Supports both Kohya-style keys (lora_unet_blocks_0_...) and diffusers PEFT format. + Also supports text encoder LoRA keys (lora_te_layers_0_...) targeting the Qwen3 encoder. + + Args: + state_dict: The LoRA state dict + alpha: The alpha value for LoRA scaling. If None, uses rank as alpha. + + Returns: + A ModelPatchRaw containing the LoRA layers + """ + layers: dict[str, BaseLayerPatch] = {} + + # Detect format + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + is_kohya = any(k.startswith(("lora_unet_", "lora_te_")) for k in str_keys) + + if is_kohya: + # Kohya format: group by layer name (everything before .lora_down/.lora_up/.alpha) + grouped = _group_kohya_keys(state_dict) + for kohya_layer_name, layer_dict in grouped.items(): + if kohya_layer_name.startswith("lora_te_"): + model_key = _convert_kohya_te_key(kohya_layer_name) + final_key = f"{ANIMA_LORA_QWEN3_PREFIX}{model_key}" + else: + model_key = _convert_kohya_unet_key(kohya_layer_name) + final_key = f"{ANIMA_LORA_TRANSFORMER_PREFIX}{model_key}" + layer = _make_layer_patch(layer_dict) + layers[final_key] = layer + else: + # Diffusers PEFT format + grouped = _group_by_layer(state_dict) + for layer_key, layer_dict in grouped.items(): + values = _get_lora_layer_values(layer_dict, alpha) + clean_key = layer_key + + # Check for text encoder prefixes + text_encoder_prefixes = [ + "base_model.model.text_encoder.", + "text_encoder.", + ] + + is_text_encoder = False + for prefix in text_encoder_prefixes: + if layer_key.startswith(prefix): + clean_key = layer_key[len(prefix):] + is_text_encoder = True + break + + # If not text encoder, check transformer prefixes + if not is_text_encoder: + for prefix in [ + "base_model.model.transformer.", + "transformer.", + "diffusion_model.", + ]: + if layer_key.startswith(prefix): + clean_key = layer_key[len(prefix):] + break + + if is_text_encoder: + final_key = f"{ANIMA_LORA_QWEN3_PREFIX}{clean_key}" + else: + final_key = f"{ANIMA_LORA_TRANSFORMER_PREFIX}{clean_key}" + + layer = _make_layer_patch(values) + layers[final_key] = layer + + return ModelPatchRaw(layers=layers) + + +def _group_kohya_keys(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]: + """Group Kohya-style LoRA keys by layer name. + + Kohya keys look like: lora_unet_blocks_0_cross_attn_k_proj.lora_down.weight + Layer name: lora_unet_blocks_0_cross_attn_k_proj + Key suffix: lora_down.weight + """ + layer_dict: dict[str, dict[str, torch.Tensor]] = {} + + known_suffixes = [ + ".lora_A.weight", + ".lora_B.weight", + ".lora_down.weight", + ".lora_up.weight", + ".dora_scale", + ".alpha", + ] + + for key in state_dict: + if not isinstance(key, str): + continue + + layer_name = None + key_name = None + for suffix in known_suffixes: + if key.endswith(suffix): + layer_name = key[: -len(suffix)] + key_name = suffix[1:] # Remove leading dot + break + + if layer_name is None: + parts = key.rsplit(".", maxsplit=2) + layer_name = parts[0] + key_name = ".".join(parts[1:]) + + if layer_name not in layer_dict: + layer_dict[layer_name] = {} + layer_dict[layer_name][key_name] = state_dict[key] + + return layer_dict + + +def _get_lora_layer_values(layer_dict: dict[str, torch.Tensor], alpha: float | None) -> dict[str, torch.Tensor]: + """Convert layer dict keys from PEFT format to internal format.""" + if "lora_A.weight" in layer_dict: + values = { + "lora_down.weight": layer_dict["lora_A.weight"], + "lora_up.weight": layer_dict["lora_B.weight"], + } + if alpha is not None: + values["alpha"] = torch.tensor(alpha) + return values + elif "lora_down.weight" in layer_dict: + return layer_dict + else: + return layer_dict + + +def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]: + """Groups keys in the state dict by layer (for diffusers PEFT format).""" + layer_dict: dict[str, dict[str, torch.Tensor]] = {} + + known_suffixes = [ + ".lora_A.weight", + ".lora_B.weight", + ".lora_down.weight", + ".lora_up.weight", + ".dora_scale", + ".alpha", + # LoKR suffixes + ".lokr_w1", + ".lokr_w2", + ".lokr_w1_a", + ".lokr_w1_b", + ".lokr_w2_a", + ".lokr_w2_b", + ] + + for key in state_dict: + if not isinstance(key, str): + continue + + layer_name = None + key_name = None + for suffix in known_suffixes: + if key.endswith(suffix): + layer_name = key[: -len(suffix)] + key_name = suffix[1:] + break + + if layer_name is None: + parts = key.rsplit(".", maxsplit=2) + layer_name = parts[0] + key_name = ".".join(parts[1:]) + + if layer_name not in layer_dict: + layer_dict[layer_name] = {} + layer_dict[layer_name][key_name] = state_dict[key] + + return layer_dict diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 9d1bd676174..e6ca9aa18e7 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -88,6 +88,31 @@ def to(self, device: torch.device | None = None, dtype: torch.dtype | None = Non return self +@dataclass +class AnimaConditioningInfo: + """Anima text conditioning information from Qwen3 0.6B encoder + T5-XXL tokenizer. + + Anima uses a dual-conditioning scheme where Qwen3 hidden states are combined + with T5-XXL token IDs inside the LLM Adapter (part of the transformer). + """ + + qwen3_embeds: torch.Tensor + """Qwen3 0.6B hidden states. Shape: (seq_len, hidden_size) where hidden_size=1024.""" + + t5xxl_ids: torch.Tensor + """T5-XXL token IDs. Shape: (seq_len,).""" + + t5xxl_weights: Optional[torch.Tensor] = None + """Per-token weights for prompt weighting. Shape: (seq_len,). None means uniform weight.""" + + def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None): + self.qwen3_embeds = self.qwen3_embeds.to(device=device, dtype=dtype) + self.t5xxl_ids = self.t5xxl_ids.to(device=device) + if self.t5xxl_weights is not None: + self.t5xxl_weights = self.t5xxl_weights.to(device=device, dtype=dtype) + return self + + @dataclass class ConditioningFieldData: # If you change this class, adding more types, you _must_ update the instantiation of ObjectSerializerDisk in @@ -100,6 +125,7 @@ class ConditioningFieldData: | List[SD3ConditioningInfo] | List[CogView4ConditioningInfo] | List[ZImageConditioningInfo] + | List[AnimaConditioningInfo] ) diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 58be5430a26..605bdc30b0d 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -1197,6 +1197,12 @@ "typePhraseHere": "Type phrase here", "t5Encoder": "T5 Encoder", "qwen3Encoder": "Qwen3 Encoder", + "animaVae": "VAE", + "animaVaePlaceholder": "Select Anima-compatible VAE", + "animaQwen3Encoder": "Qwen3 0.6B Encoder", + "animaQwen3EncoderPlaceholder": "Select Qwen3 0.6B encoder", + "animaT5Encoder": "T5-XXL Encoder", + "animaT5EncoderPlaceholder": "Select T5-XXL encoder", "zImageVae": "VAE (optional)", "zImageVaePlaceholder": "From VAE source model", "zImageQwen3Encoder": "Qwen3 Encoder (optional)", @@ -1499,6 +1505,9 @@ "noQwen3EncoderModelSelected": "No Qwen3 Encoder model selected for FLUX2 Klein generation", "noZImageVaeSourceSelected": "No VAE source: Select VAE (FLUX) or Qwen3 Source model", "noZImageQwen3EncoderSourceSelected": "No Qwen3 Encoder source: Select Qwen3 Encoder or Qwen3 Source model", + "noAnimaVaeModelSelected": "No Anima VAE model selected", + "noAnimaQwen3EncoderModelSelected": "No Anima Qwen3 Encoder model selected", + "noAnimaT5EncoderModelSelected": "No Anima T5 Encoder model selected", "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), bbox width is {{width}}", "fluxModelIncompatibleBboxHeight": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), bbox height is {{height}}", "fluxModelIncompatibleScaledBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), scaled bbox width is {{width}}", diff --git a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts index 20057472ca8..52b567fdebe 100644 --- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts +++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts @@ -4,6 +4,9 @@ import { bboxSyncedToOptimalDimension, rgRefImageModelChanged } from 'features/c import { buildSelectIsStaging, selectCanvasSessionId } from 'features/controlLayers/store/canvasStagingAreaSlice'; import { loraIsEnabledChanged } from 'features/controlLayers/store/lorasSlice'; import { + animaQwen3EncoderModelSelected, + animaT5EncoderModelSelected, + animaVaeModelSelected, kleinQwen3EncoderModelSelected, kleinVaeModelSelected, modelChanged, @@ -39,10 +42,12 @@ import { toast } from 'features/toast/toast'; import { t } from 'i18next'; import { modelConfigsAdapterSelectors, selectModelConfigsQuery } from 'services/api/endpoints/models'; import { + selectAnimaVAEModels, selectFluxVAEModels, selectGlobalRefImageModels, selectQwen3EncoderModels, selectRegionalRefImageModels, + selectT5EncoderModels, selectZImageDiffusersModels, } from 'services/api/hooks/modelsByType'; import type { FLUXKontextModelConfig, FLUXReduxModelConfig, IPAdapterModelConfig } from 'services/api/types'; @@ -155,6 +160,68 @@ export const addModelSelectedListener = (startAppListening: AppStartListening) = } } + // handle incompatible Anima models - clear if switching away from anima + const { animaVaeModel, animaQwen3EncoderModel, animaT5EncoderModel } = state.params; + if (newBase !== 'anima') { + if (animaVaeModel) { + dispatch(animaVaeModelSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + if (animaQwen3EncoderModel) { + dispatch(animaQwen3EncoderModelSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + if (animaT5EncoderModel) { + dispatch(animaT5EncoderModelSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + } else { + // Switching to Anima - set defaults if no valid configuration exists + const hasValidConfig = animaVaeModel && animaQwen3EncoderModel && animaT5EncoderModel; + + if (!hasValidConfig) { + const availableQwen3Encoders = selectQwen3EncoderModels(state); + const availableAnimaVAEs = selectAnimaVAEModels(state); + const availableT5Encoders = selectT5EncoderModels(state); + + if (availableQwen3Encoders.length > 0 && availableAnimaVAEs.length > 0) { + const qwen3Encoder = availableQwen3Encoders[0]; + const fluxVAE = availableAnimaVAEs[0]; + + if (qwen3Encoder && !animaQwen3EncoderModel) { + dispatch( + animaQwen3EncoderModelSelected({ + key: qwen3Encoder.key, + name: qwen3Encoder.name, + base: qwen3Encoder.base, + }) + ); + } + if (fluxVAE && !animaVaeModel) { + dispatch( + animaVaeModelSelected({ + key: fluxVAE.key, + hash: fluxVAE.hash, + name: fluxVAE.name, + base: fluxVAE.base, + type: fluxVAE.type, + }) + ); + } + const t5Encoder = availableT5Encoders[0]; + if (t5Encoder && !animaT5EncoderModel) { + dispatch( + animaT5EncoderModelSelected({ + key: t5Encoder.key, + name: t5Encoder.name, + base: t5Encoder.base, + }) + ); + } + } + } + } + // handle incompatible FLUX.2 Klein models - clear if switching away from flux2 const { kleinVaeModel, kleinQwen3EncoderModel } = state.params; if (newBase !== 'flux2') { diff --git a/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts b/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts index 8dcd93cc5de..7e0c7a5029e 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts @@ -211,6 +211,33 @@ const slice = createSlice({ } state.zImageQwen3SourceModel = result.data; }, + animaVaeModelSelected: (state, action: PayloadAction) => { + const result = zParamsState.shape.animaVaeModel.safeParse(action.payload); + if (!result.success) { + return; + } + state.animaVaeModel = result.data; + }, + animaQwen3EncoderModelSelected: ( + state, + action: PayloadAction<{ key: string; name: string; base: string } | null> + ) => { + const result = zParamsState.shape.animaQwen3EncoderModel.safeParse(action.payload); + if (!result.success) { + return; + } + state.animaQwen3EncoderModel = result.data; + }, + animaT5EncoderModelSelected: (state, action: PayloadAction<{ key: string; name: string; base: string } | null>) => { + const result = zParamsState.shape.animaT5EncoderModel.safeParse(action.payload); + if (!result.success) { + return; + } + state.animaT5EncoderModel = result.data; + }, + setAnimaScheduler: (state, action: PayloadAction<'euler' | 'heun' | 'lcm'>) => { + state.animaScheduler = action.payload; + }, kleinVaeModelSelected: (state, action: PayloadAction) => { const result = zParamsState.shape.kleinVaeModel.safeParse(action.payload); if (!result.success) { @@ -480,6 +507,9 @@ const resetState = (state: ParamsState): ParamsState => { newState.zImageVaeModel = oldState.zImageVaeModel; newState.zImageQwen3EncoderModel = oldState.zImageQwen3EncoderModel; newState.zImageQwen3SourceModel = oldState.zImageQwen3SourceModel; + newState.animaVaeModel = oldState.animaVaeModel; + newState.animaQwen3EncoderModel = oldState.animaQwen3EncoderModel; + newState.animaT5EncoderModel = oldState.animaT5EncoderModel; newState.kleinVaeModel = oldState.kleinVaeModel; newState.kleinQwen3EncoderModel = oldState.kleinQwen3EncoderModel; return newState; @@ -556,6 +586,10 @@ export const { syncedToOptimalDimension, paramsReset, + animaVaeModelSelected, + animaQwen3EncoderModelSelected, + animaT5EncoderModelSelected, + setAnimaScheduler, } = slice.actions; export const paramsSliceConfig: SliceConfig = { @@ -593,6 +627,7 @@ export const selectIsFLUX = createParamsSelector((params) => params.model?.base export const selectIsSD3 = createParamsSelector((params) => params.model?.base === 'sd-3'); export const selectIsCogView4 = createParamsSelector((params) => params.model?.base === 'cogview4'); export const selectIsZImage = createParamsSelector((params) => params.model?.base === 'z-image'); +export const selectIsAnima = createParamsSelector((params) => params.model?.base === 'anima'); export const selectIsFlux2 = createParamsSelector((params) => params.model?.base === 'flux2'); export const selectIsFluxKontext = createParamsSelector((params) => { if (params.model?.base === 'flux' && params.model?.name.toLowerCase().includes('kontext')) { @@ -614,6 +649,10 @@ export const selectCLIPGEmbedModel = createParamsSelector((params) => params.cli export const selectZImageVaeModel = createParamsSelector((params) => params.zImageVaeModel); export const selectZImageQwen3EncoderModel = createParamsSelector((params) => params.zImageQwen3EncoderModel); export const selectZImageQwen3SourceModel = createParamsSelector((params) => params.zImageQwen3SourceModel); +export const selectAnimaVaeModel = createParamsSelector((params) => params.animaVaeModel); +export const selectAnimaQwen3EncoderModel = createParamsSelector((params) => params.animaQwen3EncoderModel); +export const selectAnimaT5EncoderModel = createParamsSelector((params) => params.animaT5EncoderModel); +export const selectAnimaScheduler = createParamsSelector((params) => params.animaScheduler); export const selectKleinVaeModel = createParamsSelector((params) => params.kleinVaeModel); export const selectKleinQwen3EncoderModel = createParamsSelector((params) => params.kleinQwen3EncoderModel); diff --git a/invokeai/frontend/web/src/features/controlLayers/store/types.ts b/invokeai/frontend/web/src/features/controlLayers/store/types.ts index 40babc7bc85..369abbcf3da 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/types.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/types.ts @@ -750,6 +750,11 @@ export const zParamsState = z.object({ zImageVaeModel: zParameterVAEModel.nullable(), // Optional: Separate FLUX VAE zImageQwen3EncoderModel: zModelIdentifierField.nullable(), // Optional: Separate Qwen3 Encoder zImageQwen3SourceModel: zParameterModel.nullable(), // Diffusers Z-Image model (fallback for VAE/Encoder) + // Anima model components - uses Qwen3 0.6B + T5-XXL tokenizer + QwenImage VAE + animaVaeModel: zParameterVAEModel.nullable(), // Optional: Separate QwenImage/FLUX VAE for Anima + animaQwen3EncoderModel: zModelIdentifierField.nullable(), // Optional: Separate Qwen3 0.6B Encoder for Anima + animaT5EncoderModel: zModelIdentifierField.nullable(), // T5-XXL tokenizer for Anima LLM Adapter + animaScheduler: z.enum(['euler', 'heun', 'lcm']).default('euler'), // Flux2 Klein model components - uses Qwen3 instead of CLIP+T5 kleinVaeModel: zParameterVAEModel.nullable(), // Optional: Separate FLUX.2 VAE for Klein kleinQwen3EncoderModel: zModelIdentifierField.nullable(), // Optional: Separate Qwen3 Encoder for Klein @@ -815,6 +820,10 @@ export const getInitialParamsState = (): ParamsState => ({ zImageVaeModel: null, zImageQwen3EncoderModel: null, zImageQwen3SourceModel: null, + animaVaeModel: null, + animaQwen3EncoderModel: null, + animaT5EncoderModel: null, + animaScheduler: 'euler', kleinVaeModel: null, kleinQwen3EncoderModel: null, zImageSeedVarianceEnabled: false, diff --git a/invokeai/frontend/web/src/features/controlLayers/store/validators.ts b/invokeai/frontend/web/src/features/controlLayers/store/validators.ts index 3406e9e7ee6..923b2b62532 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/validators.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/validators.ts @@ -70,6 +70,16 @@ export const getRegionalGuidanceWarnings = ( } } + if (model.base === 'anima') { + // Reference images (IP Adapters) are not supported for Anima + if (entity.referenceImages.length > 0) { + warnings.push(WARNINGS.RG_REFERENCE_IMAGES_NOT_SUPPORTED); + } + if (entity.autoNegative) { + warnings.push(WARNINGS.RG_AUTO_NEGATIVE_NOT_SUPPORTED); + } + } + entity.referenceImages.forEach(({ config }) => { if (!config.model) { // No model selected @@ -117,7 +127,7 @@ export const getGlobalReferenceImageWarnings = ( const warnings: WarningTKey[] = []; if (model) { - if (model.base === 'sd-3' || model.base === 'sd-2') { + if (model.base === 'sd-3' || model.base === 'sd-2' || model.base === 'anima') { // Unsupported model architecture warnings.push(WARNINGS.UNSUPPORTED_MODEL); return warnings; @@ -160,7 +170,7 @@ export const getControlLayerWarnings = ( // No model selected warnings.push(WARNINGS.CONTROL_ADAPTER_NO_MODEL_SELECTED); } else if (model) { - if (model.base === 'sd-3' || model.base === 'sd-2') { + if (model.base === 'sd-3' || model.base === 'sd-2' || model.base === 'anima') { // Unsupported model architecture warnings.push(WARNINGS.UNSUPPORTED_MODEL); } else if (entity.controlAdapter.model.base !== model.base) { diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx index 7d1d511a3c2..213bd92e934 100644 --- a/invokeai/frontend/web/src/features/metadata/parsing.tsx +++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx @@ -8,6 +8,8 @@ import { getPrefixedId } from 'features/controlLayers/konva/util'; import { bboxHeightChanged, bboxWidthChanged, canvasMetadataRecalled } from 'features/controlLayers/store/canvasSlice'; import { loraAllDeleted, loraRecalled } from 'features/controlLayers/store/lorasSlice'; import { + animaQwen3EncoderModelSelected, + animaVaeModelSelected, heightChanged, kleinQwen3EncoderModelSelected, kleinVaeModelSelected, @@ -15,6 +17,7 @@ import { positivePromptChanged, refinerModelChanged, selectBase, + setAnimaScheduler, setCfgRescaleMultiplier, setCfgScale, setClipSkip, @@ -471,6 +474,11 @@ const Scheduler: SingleMetadataHandler = { if (value === 'euler' || value === 'heun' || value === 'lcm') { store.dispatch(setZImageScheduler(value)); } + } else if (base === 'anima') { + // Anima supports euler, heun, lcm + if (value === 'euler' || value === 'heun' || value === 'lcm') { + store.dispatch(setAnimaScheduler(value)); + } } else { // SD, SDXL, SD3, CogView4, etc. use the general scheduler store.dispatch(setScheduler(value)); @@ -933,6 +941,52 @@ const ZImageQwen3SourceModel: SingleMetadataHandler = { }; //#endregion ZImageQwen3SourceModel +//#region AnimaVAEModel +const AnimaVAEModel: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'AnimaVAEModel', + parse: async (metadata, store) => { + const raw = getProperty(metadata, 'vae'); + const parsed = await parseModelIdentifier(raw, store, 'vae'); + assert(parsed.type === 'vae'); + const base = selectBase(store.getState()); + assert(base === 'anima', 'AnimaVAEModel handler only works with Anima models'); + return Promise.resolve(parsed); + }, + recall: (value, store) => { + store.dispatch(animaVaeModelSelected(value)); + }, + i18nKey: 'metadata.vae', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion AnimaVAEModel + +//#region AnimaQwen3EncoderModel +const AnimaQwen3EncoderModel: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'AnimaQwen3EncoderModel', + parse: async (metadata, store) => { + const raw = getProperty(metadata, 'qwen3_encoder'); + const parsed = await parseModelIdentifier(raw, store, 'qwen3_encoder'); + assert(parsed.type === 'qwen3_encoder'); + const base = selectBase(store.getState()); + assert(base === 'anima', 'AnimaQwen3EncoderModel handler only works with Anima models'); + return Promise.resolve(parsed); + }, + recall: (value, store) => { + store.dispatch(animaQwen3EncoderModelSelected(value)); + }, + i18nKey: 'metadata.qwen3Encoder', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion AnimaQwen3EncoderModel + //#region KleinVAEModel const KleinVAEModel: SingleMetadataHandler = { [SingleMetadataKey]: true, @@ -1228,6 +1282,8 @@ export const ImageMetadataHandlers = { Qwen3EncoderModel, ZImageVAEModel, ZImageQwen3SourceModel, + AnimaVAEModel, + AnimaQwen3EncoderModel, KleinVAEModel, KleinQwen3EncoderModel, ZImageSeedVarianceEnabled, diff --git a/invokeai/frontend/web/src/features/modelManagerV2/models.ts b/invokeai/frontend/web/src/features/modelManagerV2/models.ts index 7b5a08adfe2..0888846ca82 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/models.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/models.ts @@ -143,6 +143,7 @@ export const MODEL_BASE_TO_COLOR: Record = { flux2: 'gold', cogview4: 'red', 'z-image': 'cyan', + anima: 'invokePurple', unknown: 'red', }; @@ -184,6 +185,7 @@ export const MODEL_BASE_TO_LONG_NAME: Record = { flux2: 'FLUX.2', cogview4: 'CogView4', 'z-image': 'Z-Image', + anima: 'Anima', unknown: 'Unknown', }; @@ -201,6 +203,7 @@ export const MODEL_BASE_TO_SHORT_NAME: Record = { flux2: 'FLUX.2', cogview4: 'CogView4', 'z-image': 'Z-Image', + anima: 'Anima', unknown: 'Unknown', }; @@ -220,6 +223,7 @@ export const MODEL_VARIANT_TO_LONG_NAME: Record = { gigantic: 'CLIP G', qwen3_4b: 'Qwen3 4B', qwen3_8b: 'Qwen3 8B', + qwen3_06b: 'Qwen3 0.6B', }; export const MODEL_FORMAT_TO_LONG_NAME: Record = { @@ -251,4 +255,5 @@ export const SUPPORTS_NEGATIVE_PROMPT_BASE_MODELS: BaseModelType[] = [ 'cogview4', 'sd-3', 'z-image', + 'anima', ]; diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index 36805c022d8..570b3026319 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -94,10 +94,11 @@ export const zBaseModelType = z.enum([ 'flux2', 'cogview4', 'z-image', + 'anima', 'unknown', ]); export type BaseModelType = z.infer; -export const zMainModelBase = z.enum(['sd-1', 'sd-2', 'sd-3', 'sdxl', 'flux', 'flux2', 'cogview4', 'z-image']); +export const zMainModelBase = z.enum(['sd-1', 'sd-2', 'sd-3', 'sdxl', 'flux', 'flux2', 'cogview4', 'z-image', 'anima']); type MainModelBase = z.infer; export const isMainModelBase = (base: unknown): base is MainModelBase => zMainModelBase.safeParse(base).success; export const zModelType = z.enum([ @@ -142,7 +143,7 @@ export const zModelVariantType = z.enum(['normal', 'inpaint', 'depth']); export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']); export const zFlux2VariantType = z.enum(['klein_4b', 'klein_9b', 'klein_9b_base']); export const zZImageVariantType = z.enum(['turbo', 'zbase']); -export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b']); +export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b', 'qwen3_06b']); export const zAnyModelVariant = z.union([ zModelVariantType, zClipVariantType, diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index 656d323b65a..9da499ab91c 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -56,6 +56,7 @@ export const FIELD_COLORS: { [key: string]: string } = { SD3MainModelField: 'teal.500', CogView4MainModelField: 'teal.500', ZImageMainModelField: 'teal.500', + AnimaMainModelField: 'teal.500', SDXLMainModelField: 'teal.500', SDXLRefinerModelField: 'teal.500', SpandrelImageToImageModelField: 'teal.500', diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addAnimaLoRAs.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addAnimaLoRAs.ts new file mode 100644 index 00000000000..df0c95ea717 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addAnimaLoRAs.ts @@ -0,0 +1,70 @@ +import type { RootState } from 'app/store/store'; +import { getPrefixedId } from 'features/controlLayers/konva/util'; +import { zModelIdentifierField } from 'features/nodes/types/common'; +import type { Graph } from 'features/nodes/util/graph/generation/Graph'; +import type { Invocation, S } from 'services/api/types'; + +export const addAnimaLoRAs = ( + state: RootState, + g: Graph, + denoise: Invocation<'anima_denoise'>, + modelLoader: Invocation<'anima_model_loader'>, + posCond: Invocation<'anima_text_encoder'>, + negCond: Invocation<'anima_text_encoder'> | null +): void => { + const enabledLoRAs = state.loras.loras.filter((l) => l.isEnabled && l.model.base === 'anima'); + const loraCount = enabledLoRAs.length; + + if (loraCount === 0) { + return; + } + + const loraMetadata: S['LoRAMetadataField'][] = []; + + // We will collect LoRAs into a single collection node, then pass them to the LoRA collection loader, which applies + // each LoRA to the transformer and Qwen3 encoder. + const loraCollector = g.addNode({ + id: getPrefixedId('lora_collector'), + type: 'collect', + }); + const loraCollectionLoader = g.addNode({ + type: 'anima_lora_collection_loader', + id: getPrefixedId('anima_lora_collection_loader'), + }); + + g.addEdge(loraCollector, 'collection', loraCollectionLoader, 'loras'); + // Use model loader as transformer input + g.addEdge(modelLoader, 'transformer', loraCollectionLoader, 'transformer'); + g.addEdge(modelLoader, 'qwen3_encoder', loraCollectionLoader, 'qwen3_encoder'); + // Reroute model connections through the LoRA collection loader + g.deleteEdgesTo(denoise, ['transformer']); + g.deleteEdgesTo(posCond, ['qwen3_encoder']); + g.addEdge(loraCollectionLoader, 'transformer', denoise, 'transformer'); + g.addEdge(loraCollectionLoader, 'qwen3_encoder', posCond, 'qwen3_encoder'); + // Only reroute negCond if it exists (guidance_scale > 0) + if (negCond !== null) { + g.deleteEdgesTo(negCond, ['qwen3_encoder']); + g.addEdge(loraCollectionLoader, 'qwen3_encoder', negCond, 'qwen3_encoder'); + } + + for (const lora of enabledLoRAs) { + const { weight } = lora; + const parsedModel = zModelIdentifierField.parse(lora.model); + + const loraSelector = g.addNode({ + type: 'lora_selector', + id: getPrefixedId('lora_selector'), + lora: parsedModel, + weight, + }); + + loraMetadata.push({ + model: parsedModel, + weight, + }); + + g.addEdge(loraSelector, 'lora', loraCollector, 'item'); + } + + g.upsertMetadata({ loras: loraMetadata }); +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts index 42730774992..1c69cdc0d11 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts @@ -21,7 +21,9 @@ type AddImageToImageArg = { state: RootState; manager: CanvasManager; l2i: Invocation; - i2l: Invocation<'i2l' | 'flux_vae_encode' | 'flux2_vae_encode' | 'sd3_i2l' | 'cogview4_i2l' | 'z_image_i2l'>; + i2l: Invocation< + 'i2l' | 'flux_vae_encode' | 'flux2_vae_encode' | 'sd3_i2l' | 'cogview4_i2l' | 'z_image_i2l' | 'anima_i2l' + >; noise?: Invocation<'noise'>; denoise: Invocation; vaeSource: Invocation; @@ -37,7 +39,16 @@ export const addImageToImage = async ({ denoise, vaeSource, }: AddImageToImageArg): Promise< - Invocation<'img_resize' | 'l2i' | 'flux_vae_decode' | 'flux2_vae_decode' | 'sd3_l2i' | 'cogview4_l2i' | 'z_image_l2i'> + Invocation< + | 'img_resize' + | 'l2i' + | 'flux_vae_decode' + | 'flux2_vae_decode' + | 'sd3_l2i' + | 'cogview4_l2i' + | 'z_image_l2i' + | 'anima_l2i' + > > => { const { denoising_start, denoising_end } = getDenoisingStartAndEnd(state); denoise.denoising_start = denoising_start; @@ -50,7 +61,8 @@ export const addImageToImage = async ({ denoise.type === 'flux_denoise' || denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || - denoise.type === 'z_image_denoise' + denoise.type === 'z_image_denoise' || + denoise.type === 'anima_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts index 837e7f09eaf..b53d79e2a5f 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts @@ -24,7 +24,9 @@ type AddInpaintArg = { state: RootState; manager: CanvasManager; l2i: Invocation; - i2l: Invocation<'i2l' | 'flux_vae_encode' | 'flux2_vae_encode' | 'sd3_i2l' | 'cogview4_i2l' | 'z_image_i2l'>; + i2l: Invocation< + 'i2l' | 'flux_vae_encode' | 'flux2_vae_encode' | 'sd3_i2l' | 'cogview4_i2l' | 'z_image_i2l' | 'anima_i2l' + >; noise?: Invocation<'noise'>; denoise: Invocation; vaeSource: Invocation; @@ -58,7 +60,8 @@ export const addInpaint = async ({ denoise.type === 'flux_denoise' || denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || - denoise.type === 'z_image_denoise' + denoise.type === 'z_image_denoise' || + denoise.type === 'anima_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts index 481a44e4c61..14be20c70e3 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts @@ -60,7 +60,8 @@ export const addOutpaint = async ({ denoise.type === 'flux_denoise' || denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || - denoise.type === 'z_image_denoise' + denoise.type === 'z_image_denoise' || + denoise.type === 'anima_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts index 22973983a91..6f2b717b40e 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addRegions.ts @@ -33,10 +33,20 @@ type AddRegionsArg = { bbox: Rect; model: MainModelConfig; posCond: Invocation< - 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'flux2_klein_text_encoder' | 'z_image_text_encoder' + | 'compel' + | 'sdxl_compel_prompt' + | 'flux_text_encoder' + | 'flux2_klein_text_encoder' + | 'z_image_text_encoder' + | 'anima_text_encoder' >; negCond: Invocation< - 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'flux2_klein_text_encoder' | 'z_image_text_encoder' + | 'compel' + | 'sdxl_compel_prompt' + | 'flux_text_encoder' + | 'flux2_klein_text_encoder' + | 'z_image_text_encoder' + | 'anima_text_encoder' > | null; posCondCollect: Invocation<'collect'>; negCondCollect: Invocation<'collect'> | null; @@ -76,6 +86,7 @@ export const addRegions = async ({ const isSDXL = model.base === 'sdxl'; const isFLUX = model.base === 'flux'; const isZImage = model.base === 'z-image'; + const isAnima = model.base === 'anima'; const validRegions = regions .filter((entity) => entity.isEnabled) @@ -116,7 +127,9 @@ export const addRegions = async ({ if (region.positivePrompt) { // The main positive conditioning node result.addedPositivePrompt = true; - let regionalPosCond: Invocation<'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder'>; + let regionalPosCond: Invocation< + 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder' | 'anima_text_encoder' + >; if (isSDXL) { regionalPosCond = g.addNode({ type: 'sdxl_compel_prompt', @@ -136,6 +149,12 @@ export const addRegions = async ({ id: getPrefixedId('prompt_region_positive_cond'), prompt: region.positivePrompt, }); + } else if (isAnima) { + regionalPosCond = g.addNode({ + type: 'anima_text_encoder', + id: getPrefixedId('prompt_region_positive_cond'), + prompt: region.positivePrompt, + }); } else { regionalPosCond = g.addNode({ type: 'compel', @@ -172,6 +191,12 @@ export const addRegions = async ({ clone.destination.node_id = regionalPosCond.id; g.addEdgeFromObj(clone); } + } else if (posCond.type === 'anima_text_encoder') { + for (const edge of g.getEdgesTo(posCond, ['qwen3_encoder', 'mask'])) { + const clone = deepClone(edge); + clone.destination.node_id = regionalPosCond.id; + g.addEdgeFromObj(clone); + } } else { assert(false, 'Unsupported positive conditioning node type.'); } @@ -183,7 +208,9 @@ export const addRegions = async ({ // The main negative conditioning node result.addedNegativePrompt = true; - let regionalNegCond: Invocation<'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder'>; + let regionalNegCond: Invocation< + 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder' | 'anima_text_encoder' + >; if (isSDXL) { regionalNegCond = g.addNode({ type: 'sdxl_compel_prompt', @@ -203,6 +230,12 @@ export const addRegions = async ({ id: getPrefixedId('prompt_region_negative_cond'), prompt: region.negativePrompt, }); + } else if (isAnima) { + regionalNegCond = g.addNode({ + type: 'anima_text_encoder', + id: getPrefixedId('prompt_region_negative_cond'), + prompt: region.negativePrompt, + }); } else { regionalNegCond = g.addNode({ type: 'compel', @@ -240,6 +273,12 @@ export const addRegions = async ({ clone.destination.node_id = regionalNegCond.id; g.addEdgeFromObj(clone); } + } else if (negCond.type === 'anima_text_encoder') { + for (const edge of g.getEdgesTo(negCond, ['qwen3_encoder', 'mask'])) { + const clone = deepClone(edge); + clone.destination.node_id = regionalNegCond.id; + g.addEdgeFromObj(clone); + } } else { assert(false, 'Unsupported negative conditioning node type.'); } @@ -259,7 +298,7 @@ export const addRegions = async ({ g.addEdge(maskToTensor, 'mask', invertTensorMask, 'mask'); // Create the conditioning node. It's going to be connected to the negative cond collector, but it uses the positive prompt let regionalPosCondInverted: Invocation< - 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder' + 'compel' | 'sdxl_compel_prompt' | 'flux_text_encoder' | 'z_image_text_encoder' | 'anima_text_encoder' >; if (isSDXL) { regionalPosCondInverted = g.addNode({ @@ -280,6 +319,12 @@ export const addRegions = async ({ id: getPrefixedId('prompt_region_positive_cond_inverted'), prompt: region.positivePrompt, }); + } else if (isAnima) { + regionalPosCondInverted = g.addNode({ + type: 'anima_text_encoder', + id: getPrefixedId('prompt_region_positive_cond_inverted'), + prompt: region.positivePrompt, + }); } else { regionalPosCondInverted = g.addNode({ type: 'compel', @@ -316,6 +361,12 @@ export const addRegions = async ({ clone.destination.node_id = regionalPosCondInverted.id; g.addEdgeFromObj(clone); } + } else if (posCond.type === 'anima_text_encoder') { + for (const edge of g.getEdgesTo(posCond, ['qwen3_encoder', 'mask'])) { + const clone = deepClone(edge); + clone.destination.node_id = regionalPosCondInverted.id; + g.addEdgeFromObj(clone); + } } else { assert(false, 'Unsupported positive conditioning node type.'); } diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts index d70457ac63e..9cfd5e3b552 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts @@ -22,7 +22,14 @@ export const addTextToImage = ({ denoise, l2i, }: AddTextToImageArg): Invocation< - 'img_resize' | 'l2i' | 'flux_vae_decode' | 'flux2_vae_decode' | 'sd3_l2i' | 'cogview4_l2i' | 'z_image_l2i' + | 'img_resize' + | 'l2i' + | 'flux_vae_decode' + | 'flux2_vae_decode' + | 'sd3_l2i' + | 'cogview4_l2i' + | 'z_image_l2i' + | 'anima_l2i' > => { denoise.denoising_start = 0; denoise.denoising_end = 1; @@ -34,7 +41,8 @@ export const addTextToImage = ({ denoise.type === 'flux_denoise' || denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || - denoise.type === 'z_image_denoise' + denoise.type === 'z_image_denoise' || + denoise.type === 'anima_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildAnimaGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildAnimaGraph.ts new file mode 100644 index 00000000000..b5ae21919d5 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildAnimaGraph.ts @@ -0,0 +1,274 @@ +import { logger } from 'app/logging/logger'; +import { getPrefixedId } from 'features/controlLayers/konva/util'; +import { + selectAnimaQwen3EncoderModel, + selectAnimaScheduler, + selectAnimaT5EncoderModel, + selectAnimaVaeModel, + selectMainModelConfig, + selectParamsSlice, +} from 'features/controlLayers/store/paramsSlice'; +import { selectCanvasMetadata, selectCanvasSlice } from 'features/controlLayers/store/selectors'; +import { fetchModelConfigWithTypeGuard } from 'features/metadata/util/modelFetchingHelpers'; +import { addAnimaLoRAs } from 'features/nodes/util/graph/generation/addAnimaLoRAs'; +import { addImageToImage } from 'features/nodes/util/graph/generation/addImageToImage'; +import { addInpaint } from 'features/nodes/util/graph/generation/addInpaint'; +import { addNSFWChecker } from 'features/nodes/util/graph/generation/addNSFWChecker'; +import { addOutpaint } from 'features/nodes/util/graph/generation/addOutpaint'; +import { addRegions } from 'features/nodes/util/graph/generation/addRegions'; +import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage'; +import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker'; +import { Graph } from 'features/nodes/util/graph/generation/Graph'; +import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils'; +import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types'; +import { selectActiveTab } from 'features/ui/store/uiSelectors'; +import type { Invocation } from 'services/api/types'; +import { isNonRefinerMainModelConfig } from 'services/api/types'; +import type { Equals } from 'tsafe'; +import { assert } from 'tsafe'; + +const log = logger('system'); + +export const buildAnimaGraph = async (arg: GraphBuilderArg): Promise => { + const { generationMode, state, manager } = arg; + + log.debug({ generationMode, manager: manager?.id }, 'Building Anima graph'); + + const model = selectMainModelConfig(state); + assert(model, 'No model selected'); + assert(model.base === 'anima', 'Selected model is not an Anima model'); + + // Get Anima component models + const animaVaeModel = selectAnimaVaeModel(state); + const animaQwen3EncoderModel = selectAnimaQwen3EncoderModel(state); + const animaT5EncoderModel = selectAnimaT5EncoderModel(state); + const animaScheduler = selectAnimaScheduler(state); + + // Validate required component models + assert( + animaVaeModel !== null, + 'No VAE model selected for Anima. Set a compatible VAE (Wan 2.1 QwenImage or FLUX VAE).' + ); + assert(animaQwen3EncoderModel !== null, 'No Qwen3 Encoder model selected for Anima. Set a Qwen3 0.6B encoder model.'); + assert(animaT5EncoderModel !== null, 'No T5 Encoder model selected for Anima. Set a T5-XXL encoder model.'); + + const params = selectParamsSlice(state); + const { cfgScale: guidance_scale, steps } = params; + + const prompts = selectPresetModifiedPrompts(state); + + const g = new Graph(getPrefixedId('anima_graph')); + + const modelLoader = g.addNode({ + type: 'anima_model_loader', + id: getPrefixedId('anima_model_loader'), + model, + vae_model: animaVaeModel ?? undefined, + qwen3_encoder_model: animaQwen3EncoderModel ?? undefined, + t5_encoder_model: animaT5EncoderModel ?? undefined, + }); + + const positivePrompt = g.addNode({ + id: getPrefixedId('positive_prompt'), + type: 'string', + }); + const posCond = g.addNode({ + type: 'anima_text_encoder', + id: getPrefixedId('pos_prompt'), + }); + // Collect node for regional prompting support + const posCondCollect = g.addNode({ + type: 'collect', + id: getPrefixedId('pos_cond_collect'), + }); + + // Anima supports negative conditioning when guidance_scale > 1 + let negCond: Invocation<'anima_text_encoder'> | null = null; + let negCondCollect: Invocation<'collect'> | null = null; + if (guidance_scale > 1) { + negCond = g.addNode({ + type: 'anima_text_encoder', + id: getPrefixedId('neg_prompt'), + prompt: prompts.negative, + }); + negCondCollect = g.addNode({ + type: 'collect', + id: getPrefixedId('neg_cond_collect'), + }); + } + + // Placeholder collect node for IP adapters (not supported for Anima but needed for addRegions) + const ipAdapterCollect = g.addNode({ + type: 'collect', + id: getPrefixedId('ip_adapter_collect'), + }); + + const seed = g.addNode({ + id: getPrefixedId('seed'), + type: 'integer', + }); + const denoise = g.addNode({ + type: 'anima_denoise', + id: getPrefixedId('denoise_latents'), + guidance_scale, + steps, + scheduler: animaScheduler, + }); + const l2i = g.addNode({ + type: 'anima_l2i', + id: getPrefixedId('l2i'), + }); + + // Connect model loader outputs + g.addEdge(modelLoader, 'transformer', denoise, 'transformer'); + g.addEdge(modelLoader, 'qwen3_encoder', posCond, 'qwen3_encoder'); + g.addEdge(modelLoader, 't5_encoder', posCond, 't5_encoder'); + g.addEdge(modelLoader, 'vae', l2i, 'vae'); + + // Connect positive prompt through collector for regional support + g.addEdge(positivePrompt, 'value', posCond, 'prompt'); + g.addEdge(posCond, 'conditioning', posCondCollect, 'item'); + g.addEdge(posCondCollect, 'collection', denoise, 'positive_conditioning'); + + // Connect negative conditioning if guidance_scale > 1 + if (negCond !== null && negCondCollect !== null) { + g.addEdge(modelLoader, 'qwen3_encoder', negCond, 'qwen3_encoder'); + g.addEdge(modelLoader, 't5_encoder', negCond, 't5_encoder'); + g.addEdge(negCond, 'conditioning', negCondCollect, 'item'); + g.addEdge(negCondCollect, 'collection', denoise, 'negative_conditioning'); + } + + // Connect seed and denoiser to L2I + g.addEdge(seed, 'value', denoise, 'seed'); + g.addEdge(denoise, 'latents', l2i, 'latents'); + + const modelConfig = await fetchModelConfigWithTypeGuard(model.key, isNonRefinerMainModelConfig); + assert(modelConfig.base === 'anima'); + + g.upsertMetadata({ + cfg_scale: guidance_scale, + negative_prompt: prompts.negative, + model: Graph.getModelMetadataField(modelConfig), + steps, + scheduler: animaScheduler, + vae: animaVaeModel ?? undefined, + qwen3_encoder: animaQwen3EncoderModel ?? undefined, + }); + g.addEdgeToMetadata(seed, 'value', 'seed'); + g.addEdgeToMetadata(positivePrompt, 'value', 'positive_prompt'); + + // Add regional guidance if canvas manager is available + const canvas = selectCanvasSlice(state); + if (manager !== null) { + await addRegions({ + manager, + regions: canvas.regionalGuidance.entities, + g, + bbox: canvas.bbox.rect, + model, + posCond, + negCond, + posCondCollect, + negCondCollect, + ipAdapterCollect, + fluxReduxCollect: null, // Not supported for Anima + }); + } + + // IP Adapters are not supported for Anima, so delete the unused collector + g.deleteNode(ipAdapterCollect.id); + + // Add LoRAs + addAnimaLoRAs(state, g, denoise, modelLoader, posCond, negCond); + + let canvasOutput: Invocation = l2i; + + if (generationMode === 'txt2img') { + canvasOutput = addTextToImage({ + g, + state, + denoise, + l2i, + }); + g.upsertMetadata({ generation_mode: 'anima_txt2img' }); + } else if (generationMode === 'img2img') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'anima_i2l', + id: getPrefixedId('anima_i2l'), + }); + + canvasOutput = await addImageToImage({ + g, + state, + manager, + denoise, + l2i, + i2l, + vaeSource: modelLoader, + }); + g.upsertMetadata({ generation_mode: 'anima_img2img' }); + } else if (generationMode === 'inpaint') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'anima_i2l', + id: getPrefixedId('anima_i2l'), + }); + + canvasOutput = await addInpaint({ + g, + state, + manager, + l2i, + i2l, + denoise, + vaeSource: modelLoader, + modelLoader, + seed, + }); + g.upsertMetadata({ generation_mode: 'anima_inpaint' }); + } else if (generationMode === 'outpaint') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'anima_i2l', + id: getPrefixedId('anima_i2l'), + }); + + canvasOutput = await addOutpaint({ + g, + state, + manager, + l2i, + i2l, + denoise, + vaeSource: modelLoader, + modelLoader, + seed, + }); + g.upsertMetadata({ generation_mode: 'anima_outpaint' }); + } else { + assert>(false); + } + + if (state.system.shouldUseNSFWChecker) { + canvasOutput = addNSFWChecker(g, canvasOutput); + } + + if (state.system.shouldUseWatermarker) { + canvasOutput = addWatermarker(g, canvasOutput); + } + + g.updateNode(canvasOutput, selectCanvasOutputFields(state)); + + if (selectActiveTab(state) === 'canvas') { + g.upsertMetadata(selectCanvasMetadata(state)); + } + + g.setMetadataReceivingNode(canvasOutput); + + return { + g, + seed, + positivePrompt, + }; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts b/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts index 0424f175066..fdc3c2a5a2d 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts @@ -213,7 +213,8 @@ export const isMainModelWithoutUnet = (modelLoader: Invocation0.8). The exponent 0.2 spreads the effective range more evenly, + // matching the approach used for FLUX and SD3. + const animaExponent = optimizedDenoisingEnabled ? 0.2 : 1; + return { + denoising_start: 1 - denoisingStrength ** animaExponent, + denoising_end: 1, + }; + } case 'sd-1': case 'sd-2': case 'cogview4': diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/types.ts b/invokeai/frontend/web/src/features/nodes/util/graph/types.ts index 334f7449c43..f02a7bf6f8c 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/types.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/types.ts @@ -15,7 +15,8 @@ export type ImageOutputNodes = | 'flux2_vae_decode' | 'sd3_l2i' | 'cogview4_l2i' - | 'z_image_l2i'; + | 'z_image_l2i' + | 'anima_l2i'; export type LatentToImageNodes = | 'l2i' @@ -23,7 +24,8 @@ export type LatentToImageNodes = | 'flux2_vae_decode' | 'sd3_l2i' | 'cogview4_l2i' - | 'z_image_l2i'; + | 'z_image_l2i' + | 'anima_l2i'; export type ImageToLatentsNodes = | 'i2l' @@ -31,7 +33,8 @@ export type ImageToLatentsNodes = | 'flux2_vae_encode' | 'sd3_i2l' | 'cogview4_i2l' - | 'z_image_i2l'; + | 'z_image_i2l' + | 'anima_i2l'; export type DenoiseLatentsNodes = | 'denoise_latents' @@ -39,7 +42,8 @@ export type DenoiseLatentsNodes = | 'flux2_denoise' | 'sd3_denoise' | 'cogview4_denoise' - | 'z_image_denoise'; + | 'z_image_denoise' + | 'anima_denoise'; export type MainModelLoaderNodes = | 'main_model_loader' @@ -48,7 +52,8 @@ export type MainModelLoaderNodes = | 'flux2_klein_model_loader' | 'sd3_model_loader' | 'cogview4_model_loader' - | 'z_image_model_loader'; + | 'z_image_model_loader' + | 'anima_model_loader'; export type VaeSourceNodes = 'seamless' | 'vae_loader'; diff --git a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamAnimaModelSelect.tsx b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamAnimaModelSelect.tsx new file mode 100644 index 00000000000..63cc5202090 --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamAnimaModelSelect.tsx @@ -0,0 +1,163 @@ +import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { useModelCombobox } from 'common/hooks/useModelCombobox'; +import { + animaQwen3EncoderModelSelected, + animaT5EncoderModelSelected, + animaVaeModelSelected, + selectAnimaQwen3EncoderModel, + selectAnimaT5EncoderModel, + selectAnimaVaeModel, +} from 'features/controlLayers/store/paramsSlice'; +import { zModelIdentifierField } from 'features/nodes/types/common'; +import { memo, useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useAnimaVAEModels, useQwen3EncoderModels, useT5EncoderModels } from 'services/api/hooks/modelsByType'; +import type { Qwen3EncoderModelConfig, T5EncoderModelConfig, VAEModelConfig } from 'services/api/types'; + +/** + * Anima VAE Model Select - uses Anima-base VAE models (QwenImage/Wan 2.1 VAE) + */ +const ParamAnimaVaeModelSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const animaVaeModel = useAppSelector(selectAnimaVaeModel); + const [modelConfigs, { isLoading }] = useAnimaVAEModels(); + + const _onChange = useCallback( + (model: VAEModelConfig | null) => { + if (model) { + dispatch(animaVaeModelSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(animaVaeModelSelected(null)); + } + }, + [dispatch] + ); + + const { options, value, onChange, noOptionsMessage } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: animaVaeModel, + isLoading, + }); + + return ( + + {t('modelManager.animaVae')} + + + ); +}); + +ParamAnimaVaeModelSelect.displayName = 'ParamAnimaVaeModelSelect'; + +/** + * Anima Qwen3 0.6B Encoder Model Select + */ +const ParamAnimaQwen3EncoderModelSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const animaQwen3EncoderModel = useAppSelector(selectAnimaQwen3EncoderModel); + const [modelConfigs, { isLoading }] = useQwen3EncoderModels(); + + const _onChange = useCallback( + (model: Qwen3EncoderModelConfig | null) => { + if (model) { + dispatch(animaQwen3EncoderModelSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(animaQwen3EncoderModelSelected(null)); + } + }, + [dispatch] + ); + + const { options, value, onChange, noOptionsMessage } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: animaQwen3EncoderModel, + isLoading, + }); + + return ( + + {t('modelManager.animaQwen3Encoder')} + + + ); +}); + +ParamAnimaQwen3EncoderModelSelect.displayName = 'ParamAnimaQwen3EncoderModelSelect'; + +/** + * Anima T5 Encoder Model Select - uses T5-XXL encoder models (tokenizer submodel used for Anima) + */ +const ParamAnimaT5EncoderModelSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const animaT5EncoderModel = useAppSelector(selectAnimaT5EncoderModel); + const [modelConfigs, { isLoading }] = useT5EncoderModels(); + + const _onChange = useCallback( + (model: T5EncoderModelConfig | null) => { + if (model) { + dispatch(animaT5EncoderModelSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(animaT5EncoderModelSelected(null)); + } + }, + [dispatch] + ); + + const { options, value, onChange, noOptionsMessage } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: animaT5EncoderModel, + isLoading, + }); + + return ( + + {t('modelManager.animaT5Encoder')} + + + ); +}); + +ParamAnimaT5EncoderModelSelect.displayName = 'ParamAnimaT5EncoderModelSelect'; + +/** + * Combined component for Anima model selection (VAE + Qwen3 Encoder + T5 Encoder) + */ +const ParamAnimaModelSelect = () => { + return ( + <> + + + + + ); +}; + +export default memo(ParamAnimaModelSelect); diff --git a/invokeai/frontend/web/src/features/parameters/components/Core/ParamAnimaScheduler.tsx b/invokeai/frontend/web/src/features/parameters/components/Core/ParamAnimaScheduler.tsx new file mode 100644 index 00000000000..fbb1819b4e7 --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Core/ParamAnimaScheduler.tsx @@ -0,0 +1,45 @@ +import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library'; +import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import { selectAnimaScheduler, setAnimaScheduler } from 'features/controlLayers/store/paramsSlice'; +import { isParameterZImageScheduler } from 'features/parameters/types/parameterSchemas'; +import { memo, useCallback, useMemo } from 'react'; +import { useTranslation } from 'react-i18next'; + +// Anima scheduler options (same flow-matching schedulers as Z-Image) +const ANIMA_SCHEDULER_OPTIONS: ComboboxOption[] = [ + { value: 'euler', label: 'Euler' }, + { value: 'heun', label: 'Heun (2nd order)' }, + { value: 'lcm', label: 'LCM' }, +]; + +const ParamAnimaScheduler = () => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const animaScheduler = useAppSelector(selectAnimaScheduler); + + const onChange = useCallback( + (v) => { + // Reuse Z-Image scheduler type guard since the values are identical + if (!isParameterZImageScheduler(v?.value)) { + return; + } + dispatch(setAnimaScheduler(v.value)); + }, + [dispatch] + ); + + const value = useMemo(() => ANIMA_SCHEDULER_OPTIONS.find((o) => o.value === animaScheduler), [animaScheduler]); + + return ( + + + {t('parameters.scheduler')} + + + + ); +}; + +export default memo(ParamAnimaScheduler); diff --git a/invokeai/frontend/web/src/features/parameters/types/constants.ts b/invokeai/frontend/web/src/features/parameters/types/constants.ts index 8bace52e556..03441c1d76c 100644 --- a/invokeai/frontend/web/src/features/parameters/types/constants.ts +++ b/invokeai/frontend/web/src/features/parameters/types/constants.ts @@ -41,6 +41,10 @@ export const CLIP_SKIP_MAP: { [key in BaseModelType]?: { maxClip: number; marker maxClip: 0, markers: [], }, + anima: { + maxClip: 0, + markers: [], + }, }; /** diff --git a/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts b/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts index fd0d7a240d9..5a9622a12fe 100644 --- a/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts +++ b/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts @@ -3,7 +3,7 @@ import type { BaseModelType } from 'features/nodes/types/common'; /** * Gets the optimal dimension for a given base model: * - sd-1, sd-2: 512 - * - sdxl, flux, sd-3, cogview4, z-image: 1024 + * - sdxl, flux, sd-3, cogview4, z-image, anima: 1024 * - default: 1024 * @param base The base model * @returns The optimal dimension for the model, defaulting to 1024 @@ -19,6 +19,7 @@ export const getOptimalDimension = (base?: BaseModelType | null): number => { case 'sd-3': case 'cogview4': case 'z-image': + case 'anima': default: return 1024; } @@ -60,7 +61,7 @@ export const isInSDXLTrainingDimensions = (width: number, height: number): boole /** * Gets the grid size for a given base model. For Flux, the grid size is 16, otherwise it is 8. - * - sd-1, sd-2, sdxl: 8 + * - sd-1, sd-2, sdxl, anima: 8 * - flux, sd-3, z-image: 16 * - cogview4: 32 * - default: 8 @@ -79,6 +80,7 @@ export const getGridSize = (base?: BaseModelType | null): number => { case 'sd-1': case 'sd-2': case 'sdxl': + case 'anima': default: return 8; } diff --git a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts index 652cf4c5b24..b32ea8e3030 100644 --- a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts +++ b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts @@ -8,6 +8,7 @@ import { useCanvasManagerSafe } from 'features/controlLayers/contexts/CanvasMana import type { CanvasManager } from 'features/controlLayers/konva/CanvasManager'; import { positivePromptAddedToHistory, selectPositivePrompt } from 'features/controlLayers/store/paramsSlice'; import { prepareLinearUIBatch } from 'features/nodes/util/graph/buildLinearBatchConfig'; +import { buildAnimaGraph } from 'features/nodes/util/graph/generation/buildAnimaGraph'; import { buildCogView4Graph } from 'features/nodes/util/graph/generation/buildCogView4Graph'; import { buildFLUXGraph } from 'features/nodes/util/graph/generation/buildFLUXGraph'; import { buildSD1Graph } from 'features/nodes/util/graph/generation/buildSD1Graph'; @@ -59,6 +60,8 @@ const enqueueCanvas = async (store: AppStore, canvasManager: CanvasManager, prep return await buildCogView4Graph(graphBuilderArg); case 'z-image': return await buildZImageGraph(graphBuilderArg); + case 'anima': + return await buildAnimaGraph(graphBuilderArg); default: assert(false, `No graph builders for base ${base}`); } diff --git a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts index cf00a12ee5f..cbae643ee92 100644 --- a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts +++ b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts @@ -6,6 +6,7 @@ import { extractMessageFromAssertionError } from 'common/util/extractMessageFrom import { withResult, withResultAsync } from 'common/util/result'; import { positivePromptAddedToHistory, selectPositivePrompt } from 'features/controlLayers/store/paramsSlice'; import { prepareLinearUIBatch } from 'features/nodes/util/graph/buildLinearBatchConfig'; +import { buildAnimaGraph } from 'features/nodes/util/graph/generation/buildAnimaGraph'; import { buildCogView4Graph } from 'features/nodes/util/graph/generation/buildCogView4Graph'; import { buildFLUXGraph } from 'features/nodes/util/graph/generation/buildFLUXGraph'; import { buildSD1Graph } from 'features/nodes/util/graph/generation/buildSD1Graph'; @@ -52,6 +53,8 @@ const enqueueGenerate = async (store: AppStore, prepend: boolean) => { return await buildCogView4Graph(graphBuilderArg); case 'z-image': return await buildZImageGraph(graphBuilderArg); + case 'anima': + return await buildAnimaGraph(graphBuilderArg); default: assert(false, `No graph builders for base ${base}`); } diff --git a/invokeai/frontend/web/src/features/queue/store/readiness.ts b/invokeai/frontend/web/src/features/queue/store/readiness.ts index 8fa97eff4a9..0775faa2cda 100644 --- a/invokeai/frontend/web/src/features/queue/store/readiness.ts +++ b/invokeai/frontend/web/src/features/queue/store/readiness.ts @@ -270,6 +270,18 @@ const getReasonsWhyCannotEnqueueGenerateTab = (arg: { } } + if (model?.base === 'anima') { + if (!params.animaVaeModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaVaeModelSelected') }); + } + if (!params.animaQwen3EncoderModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaQwen3EncoderModelSelected') }); + } + if (!params.animaT5EncoderModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaT5EncoderModelSelected') }); + } + } + if (model) { for (const lora of loras.filter(({ isEnabled }) => isEnabled === true)) { if (model.base !== lora.model.base) { @@ -648,6 +660,18 @@ const getReasonsWhyCannotEnqueueCanvasTab = (arg: { } } + if (model?.base === 'anima') { + if (!params.animaVaeModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaVaeModelSelected') }); + } + if (!params.animaQwen3EncoderModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaQwen3EncoderModelSelected') }); + } + if (!params.animaT5EncoderModel) { + reasons.push({ content: i18n.t('parameters.invoke.noAnimaT5EncoderModelSelected') }); + } + } + if (model) { for (const lora of loras.filter(({ isEnabled }) => isEnabled === true)) { if (model.base !== lora.model.base) { diff --git a/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx b/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx index eddf82e9221..44286780784 100644 --- a/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx +++ b/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx @@ -4,6 +4,7 @@ import { skipToken } from '@reduxjs/toolkit/query'; import { createMemoizedSelector } from 'app/store/createMemoizedSelector'; import { useAppSelector } from 'app/store/storeHooks'; import { + selectIsAnima, selectIsFLUX, selectIsFlux2, selectIsSD3, @@ -11,6 +12,7 @@ import { selectParamsSlice, selectVAEKey, } from 'features/controlLayers/store/paramsSlice'; +import ParamAnimaModelSelect from 'features/parameters/components/Advanced/ParamAnimaModelSelect'; import ParamCFGRescaleMultiplier from 'features/parameters/components/Advanced/ParamCFGRescaleMultiplier'; import ParamCLIPEmbedModelSelect from 'features/parameters/components/Advanced/ParamCLIPEmbedModelSelect'; import ParamCLIPGEmbedModelSelect from 'features/parameters/components/Advanced/ParamCLIPGEmbedModelSelect'; @@ -45,6 +47,7 @@ export const AdvancedSettingsAccordion = memo(() => { const isFlux2 = useAppSelector(selectIsFlux2); const isSD3 = useAppSelector(selectIsSD3); const isZImage = useAppSelector(selectIsZImage); + const isAnima = useAppSelector(selectIsAnima); const selectBadges = useMemo( () => @@ -94,13 +97,13 @@ export const AdvancedSettingsAccordion = memo(() => { return ( - {!isZImage && !isFlux2 && ( + {!isZImage && !isAnima && !isFlux2 && ( {isFLUX ? : } {!isFLUX && !isSD3 && } )} - {!isFLUX && !isFlux2 && !isSD3 && !isZImage && ( + {!isFLUX && !isFlux2 && !isSD3 && !isZImage && !isAnima && ( <> @@ -142,6 +145,11 @@ export const AdvancedSettingsAccordion = memo(() => { )} + {isAnima && ( + + + + )} ); diff --git a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx index ffdbc4ce778..50ddcf2bbae 100644 --- a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx +++ b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx @@ -6,6 +6,7 @@ import { useAppSelector } from 'app/store/storeHooks'; import { selectLoRAsSlice } from 'features/controlLayers/store/lorasSlice'; import { selectFluxDypePreset, + selectIsAnima, selectIsCogView4, selectIsFLUX, selectIsFlux2, @@ -14,6 +15,7 @@ import { } from 'features/controlLayers/store/paramsSlice'; import { LoRAList } from 'features/lora/components/LoRAList'; import LoRASelect from 'features/lora/components/LoRASelect'; +import ParamAnimaScheduler from 'features/parameters/components/Core/ParamAnimaScheduler'; import ParamCFGScale from 'features/parameters/components/Core/ParamCFGScale'; import ParamFluxDypeExponent from 'features/parameters/components/Core/ParamFluxDypeExponent'; import ParamFluxDypePreset from 'features/parameters/components/Core/ParamFluxDypePreset'; @@ -44,6 +46,7 @@ export const GenerationSettingsAccordion = memo(() => { const isSD3 = useAppSelector(selectIsSD3); const isCogView4 = useAppSelector(selectIsCogView4); const isZImage = useAppSelector(selectIsZImage); + const isAnima = useAppSelector(selectIsAnima); const fluxDypePreset = useAppSelector(selectFluxDypePreset); const selectBadges = useMemo( @@ -82,9 +85,10 @@ export const GenerationSettingsAccordion = memo(() => { - {!isFLUX && !isFlux2 && !isSD3 && !isCogView4 && !isZImage && } + {!isFLUX && !isFlux2 && !isSD3 && !isCogView4 && !isZImage && !isAnima && } {isFLUX && } {isZImage && } + {isAnima && } {(isFLUX || isFlux2) && modelConfig && !isFluxFillMainModelModelConfig(modelConfig) && } {!isFLUX && !isFlux2 && } diff --git a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts index 98d7dd1e8df..25087737ca9 100644 --- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts +++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts @@ -11,6 +11,7 @@ import { } from 'services/api/endpoints/models'; import type { AnyModelConfig } from 'services/api/types'; import { + isAnimaVAEModelConfig, isCLIPEmbedModelConfigOrSubmodel, isControlLayerModelConfig, isControlNetModelConfig, @@ -68,6 +69,7 @@ export const useEmbeddingModels = buildModelsHook(isTIModelConfig); export const useVAEModels = () => buildModelsHook(isVAEModelConfigOrSubmodel)(); export const useFlux1VAEModels = () => buildModelsHook(isFlux1VAEModelConfig)(); export const useFlux2VAEModels = () => buildModelsHook(isFlux2VAEModelConfig)(); +export const useAnimaVAEModels = () => buildModelsHook(isAnimaVAEModelConfig)(); export const useZImageDiffusersModels = () => buildModelsHook(isZImageDiffusersMainModelConfig)(); export const useQwen3EncoderModels = () => buildModelsHook(isQwen3EncoderModelConfig)(); export const useGlobalReferenceImageModels = buildModelsHook( @@ -106,3 +108,5 @@ export const selectRegionalRefImageModels = buildModelsSelector( export const selectQwen3EncoderModels = buildModelsSelector(isQwen3EncoderModelConfig); export const selectZImageDiffusersModels = buildModelsSelector(isZImageDiffusersMainModelConfig); export const selectFluxVAEModels = buildModelsSelector(isFluxVAEModelConfig); +export const selectAnimaVAEModels = buildModelsSelector(isAnimaVAEModelConfig); +export const selectT5EncoderModels = buildModelsSelector(isT5EncoderModelConfigOrSubmodel); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index fc6506ce22b..af78eb5c22a 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -2612,7 +2612,528 @@ export type components = { */ type: "alpha_mask_to_tensor"; }; - AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + /** + * AnimaConditioningField + * @description An Anima conditioning tensor primitive value. + * + * Anima conditioning contains Qwen3 0.6B hidden states and T5-XXL token IDs, + * which are combined by the LLM Adapter inside the transformer. + */ + AnimaConditioningField: { + /** + * Conditioning Name + * @description The name of conditioning tensor + */ + conditioning_name: string; + /** + * @description The mask associated with this conditioning tensor for regional prompting. Excluded regions should be set to False, included regions should be set to True. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + }; + /** + * AnimaConditioningOutput + * @description Base class for nodes that output an Anima text conditioning tensor. + */ + AnimaConditioningOutput: { + /** @description Conditioning tensor */ + conditioning: components["schemas"]["AnimaConditioningField"]; + /** + * type + * @default anima_conditioning_output + * @constant + */ + type: "anima_conditioning_output"; + }; + /** + * Denoise - Anima + * @description Run the denoising process with an Anima model. + * + * Uses rectified flow sampling with shift=3.0 and the Cosmos Predict2 DiT + * backbone with integrated LLM Adapter for text conditioning. + * + * Supports txt2img, img2img (via latents input), and inpainting (via denoise_mask). + */ + AnimaDenoiseInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * @description A mask of the region to apply the denoising process to. Values of 0.0 represent the regions to be fully denoised, and 1.0 represent the regions to be preserved. + * @default null + */ + denoise_mask?: components["schemas"]["DenoiseMaskField"] | null; + /** + * Denoising Start + * @description When to start denoising, expressed a percentage of total steps + * @default 0 + */ + denoising_start?: number; + /** + * Denoising End + * @description When to stop denoising, expressed a percentage of total steps + * @default 1 + */ + denoising_end?: number; + /** + * Add Noise + * @description Add noise based on denoising start. + * @default true + */ + add_noise?: boolean; + /** + * Transformer + * @description Anima transformer model. + * @default null + */ + transformer?: components["schemas"]["TransformerField"] | null; + /** + * Positive Conditioning + * @description Positive conditioning tensor + * @default null + */ + positive_conditioning?: components["schemas"]["AnimaConditioningField"] | components["schemas"]["AnimaConditioningField"][] | null; + /** + * Negative Conditioning + * @description Negative conditioning tensor + * @default null + */ + negative_conditioning?: components["schemas"]["AnimaConditioningField"] | components["schemas"]["AnimaConditioningField"][] | null; + /** + * Guidance Scale + * @description Guidance scale for classifier-free guidance. Recommended: 4.0-5.0 for Anima. + * @default 4.5 + */ + guidance_scale?: number; + /** + * Width + * @description Width of the generated image. + * @default 1024 + */ + width?: number; + /** + * Height + * @description Height of the generated image. + * @default 1024 + */ + height?: number; + /** + * Steps + * @description Number of denoising steps. 30 recommended for Anima. + * @default 30 + */ + steps?: number; + /** + * Seed + * @description Randomness seed for reproducibility. + * @default 0 + */ + seed?: number; + /** + * Scheduler + * @description Scheduler (sampler) for the denoising process. + * @default euler + * @enum {string} + */ + scheduler?: "euler" | "heun" | "lcm"; + /** + * type + * @default anima_denoise + * @constant + */ + type: "anima_denoise"; + }; + /** + * Image to Latents - Anima + * @description Generates latents from an image using the Anima VAE (supports Wan 2.1 and FLUX VAE). + */ + AnimaImageToLatentsInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to encode. + * @default null + */ + image?: components["schemas"]["ImageField"] | null; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * type + * @default anima_i2l + * @constant + */ + type: "anima_i2l"; + }; + /** + * Latents to Image - Anima + * @description Generates an image from latents using the Anima VAE. + * + * Supports the Wan 2.1 QwenImage VAE (AutoencoderKLWan) with explicit + * latent denormalization, and FLUX VAE as fallback. + */ + AnimaLatentsToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * type + * @default anima_l2i + * @constant + */ + type: "anima_l2i"; + }; + /** + * Apply LoRA Collection - Anima + * @description Applies a collection of LoRAs to an Anima transformer. + */ + AnimaLoRACollectionLoader: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRAs + * @description LoRA models and weights. May be a single LoRA or collection. + * @default null + */ + loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][] | null; + /** + * Transformer + * @description Transformer + * @default null + */ + transformer?: components["schemas"]["TransformerField"] | null; + /** + * Qwen3 Encoder + * @description Qwen3 tokenizer and text encoder + * @default null + */ + qwen3_encoder?: components["schemas"]["Qwen3EncoderField"] | null; + /** + * type + * @default anima_lora_collection_loader + * @constant + */ + type: "anima_lora_collection_loader"; + }; + /** + * Apply LoRA - Anima + * @description Apply a LoRA model to an Anima transformer and/or Qwen3 text encoder. + */ + AnimaLoRALoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRA + * @description LoRA model to load + * @default null + */ + lora?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + * @default 0.75 + */ + weight?: number; + /** + * Anima Transformer + * @description Transformer + * @default null + */ + transformer?: components["schemas"]["TransformerField"] | null; + /** + * Qwen3 Encoder + * @description Qwen3 tokenizer and text encoder + * @default null + */ + qwen3_encoder?: components["schemas"]["Qwen3EncoderField"] | null; + /** + * type + * @default anima_lora_loader + * @constant + */ + type: "anima_lora_loader"; + }; + /** + * AnimaLoRALoaderOutput + * @description Anima LoRA Loader Output + */ + AnimaLoRALoaderOutput: { + /** + * Anima Transformer + * @description Transformer + * @default null + */ + transformer: components["schemas"]["TransformerField"] | null; + /** + * Qwen3 Encoder + * @description Qwen3 tokenizer and text encoder + * @default null + */ + qwen3_encoder: components["schemas"]["Qwen3EncoderField"] | null; + /** + * type + * @default anima_lora_loader_output + * @constant + */ + type: "anima_lora_loader_output"; + }; + /** + * Main Model - Anima + * @description Loads an Anima model, outputting its submodels. + * + * Anima uses: + * - Transformer: Cosmos Predict2 DiT + LLM Adapter (from single-file checkpoint) + * - Qwen3 Encoder: Qwen3 0.6B (standalone single-file) + * - VAE: AutoencoderKLQwenImage / Wan 2.1 VAE (standalone single-file or FLUX VAE) + * - T5 Encoder: T5-XXL model (only the tokenizer submodel is used, for LLM Adapter token IDs) + */ + AnimaModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Transformer + * @description Anima main model (transformer + LLM adapter). + */ + model: components["schemas"]["ModelIdentifierField"]; + /** + * VAE + * @description Standalone VAE model. Anima uses a Wan 2.1 / QwenImage VAE (16-channel). If not provided, a FLUX VAE can be used as a fallback. + * @default null + */ + vae_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Qwen3 Encoder + * @description Standalone Qwen3 0.6B Encoder model. + * @default null + */ + qwen3_encoder_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * T5 Encoder + * @description T5-XXL encoder model. The tokenizer submodel is used for Anima text encoding. + * @default null + */ + t5_encoder_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * type + * @default anima_model_loader + * @constant + */ + type: "anima_model_loader"; + }; + /** + * AnimaModelLoaderOutput + * @description Anima model loader output. + */ + AnimaModelLoaderOutput: { + /** + * Transformer + * @description Transformer + */ + transformer: components["schemas"]["TransformerField"]; + /** + * Qwen3 Encoder + * @description Qwen3 tokenizer and text encoder + */ + qwen3_encoder: components["schemas"]["Qwen3EncoderField"]; + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + */ + t5_encoder: components["schemas"]["T5EncoderField"]; + /** + * type + * @default anima_model_loader_output + * @constant + */ + type: "anima_model_loader_output"; + }; + /** + * Prompt - Anima + * @description Encodes and preps a prompt for an Anima image. + * + * Uses Qwen3 0.6B for hidden state extraction and T5-XXL tokenizer for + * token IDs (no T5 model weights needed). Both are combined by the + * LLM Adapter inside the Anima transformer during denoising. + */ + AnimaTextEncoderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Prompt + * @description Text prompt to encode. + * @default null + */ + prompt?: string | null; + /** + * Qwen3 Encoder + * @description Qwen3 tokenizer and text encoder + * @default null + */ + qwen3_encoder?: components["schemas"]["Qwen3EncoderField"] | null; + /** + * T5 Encoder + * @description T5 tokenizer and text encoder + * @default null + */ + t5_encoder?: components["schemas"]["T5EncoderField"] | null; + /** + * @description A mask defining the region that this conditioning prompt applies to. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; + /** + * type + * @default anima_text_encoder + * @constant + */ + type: "anima_text_encoder"; + }; + AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; /** * AppVersion * @description App Version Response @@ -2764,7 +3285,7 @@ export type components = { * fallback/null value `BaseModelType.Any` for these models, instead of making the model base optional. * @enum {string} */ - BaseModelType: "any" | "sd-1" | "sd-2" | "sd-3" | "sdxl" | "sdxl-refiner" | "flux" | "flux2" | "cogview4" | "z-image" | "unknown"; + BaseModelType: "any" | "sd-1" | "sd-2" | "sd-3" | "sdxl" | "sdxl-refiner" | "flux" | "flux2" | "cogview4" | "z-image" | "anima" | "unknown"; /** Batch */ Batch: { /** @@ -6420,7 +6941,7 @@ export type components = { * @description The generation mode that output this image * @default null */ - generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint" | "flux_txt2img" | "flux_img2img" | "flux_inpaint" | "flux_outpaint" | "flux2_txt2img" | "flux2_img2img" | "flux2_inpaint" | "flux2_outpaint" | "sd3_txt2img" | "sd3_img2img" | "sd3_inpaint" | "sd3_outpaint" | "cogview4_txt2img" | "cogview4_img2img" | "cogview4_inpaint" | "cogview4_outpaint" | "z_image_txt2img" | "z_image_img2img" | "z_image_inpaint" | "z_image_outpaint") | null; + generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint" | "flux_txt2img" | "flux_img2img" | "flux_inpaint" | "flux_outpaint" | "flux2_txt2img" | "flux2_img2img" | "flux2_inpaint" | "flux2_outpaint" | "sd3_txt2img" | "sd3_img2img" | "sd3_inpaint" | "sd3_outpaint" | "cogview4_txt2img" | "cogview4_img2img" | "cogview4_inpaint" | "cogview4_outpaint" | "z_image_txt2img" | "z_image_img2img" | "z_image_inpaint" | "z_image_outpaint" | "anima_txt2img" | "anima_img2img" | "anima_inpaint" | "anima_outpaint") | null; /** * Positive Prompt * @description The positive prompt parameter @@ -10657,7 +11178,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; }; /** * Edges @@ -10694,7 +11215,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + [key: string]: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * Errors @@ -13879,7 +14400,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -13889,7 +14410,7 @@ export type components = { * Result * @description The result of the invocation */ - result: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + result: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * InvocationErrorEvent @@ -13943,7 +14464,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -13968,6 +14489,13 @@ export type components = { InvocationOutputMap: { add: components["schemas"]["IntegerOutput"]; alpha_mask_to_tensor: components["schemas"]["MaskOutput"]; + anima_denoise: components["schemas"]["LatentsOutput"]; + anima_i2l: components["schemas"]["LatentsOutput"]; + anima_l2i: components["schemas"]["ImageOutput"]; + anima_lora_collection_loader: components["schemas"]["AnimaLoRALoaderOutput"]; + anima_lora_loader: components["schemas"]["AnimaLoRALoaderOutput"]; + anima_model_loader: components["schemas"]["AnimaModelLoaderOutput"]; + anima_text_encoder: components["schemas"]["AnimaConditioningOutput"]; apply_mask_to_image: components["schemas"]["ImageOutput"]; apply_tensor_mask_to_image: components["schemas"]["ImageOutput"]; blank_image: components["schemas"]["ImageOutput"]; @@ -14249,7 +14777,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -14324,7 +14852,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -14483,14 +15011,14 @@ export type components = { * Convert Cache Dir * Format: path * @description Path to the converted models cache directory (DEPRECATED, but do not delete because it is needed for migration from previous versions). - * @default models/.convert_cache + * @default models\.convert_cache */ convert_cache_dir?: string; /** * Download Cache Dir * Format: path * @description Path to the directory that contains dynamically downloaded models. - * @default models/.download_cache + * @default models\.download_cache */ download_cache_dir?: string; /** @@ -16558,6 +17086,84 @@ export type components = { base: "z-image"; variant: components["schemas"]["ZImageVariantType"] | null; }; + /** + * LoRA_LyCORIS_Anima_Config + * @description Model config for Anima LoRA models in LyCORIS format. + */ + LoRA_LyCORIS_Anima_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Type + * @default lora + * @constant + */ + type: "lora"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases: string[] | null; + /** @description Default settings for this model */ + default_settings: components["schemas"]["LoraModelDefaultSettings"] | null; + /** + * Format + * @default lycoris + * @constant + */ + format: "lycoris"; + /** + * Base + * @default anima + * @constant + */ + base: "anima"; + }; /** LoRA_LyCORIS_FLUX_Config */ LoRA_LyCORIS_FLUX_Config: { /** @@ -17517,6 +18123,92 @@ export type components = { format: "bnb_quantized_nf4b"; variant: components["schemas"]["FluxVariantType"]; }; + /** + * Main_Checkpoint_Anima_Config + * @description Model config for Anima single-file checkpoint models (safetensors). + * + * Anima is built on NVIDIA Cosmos Predict2 DiT with a custom LLM Adapter + * that bridges Qwen3 0.6B text encoder outputs to the DiT. + */ + Main_Checkpoint_Anima_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Type + * @default main + * @constant + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases: string[] | null; + /** @description Default settings for this model */ + default_settings: components["schemas"]["MainModelDefaultSettings"] | null; + /** + * Config Path + * @description Path to the config for this model, if any. + */ + config_path: string | null; + /** + * Base + * @default anima + * @constant + */ + base: "anima"; + /** + * Format + * @default checkpoint + * @constant + */ + format: "checkpoint"; + }; /** * Main_Checkpoint_FLUX_Config * @description Model config for main checkpoint models. @@ -20897,7 +21589,7 @@ export type components = { * Config * @description The installed model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; /** * ModelInstallDownloadProgressEvent @@ -21063,7 +21755,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]) | null; + config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -21149,7 +21841,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -21170,7 +21862,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -21344,7 +22036,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"])[]; + models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"])[]; }; /** * Multiply Integers @@ -22472,7 +23164,7 @@ export type components = { * @description Qwen3 text encoder variants based on model size. * @enum {string} */ - Qwen3VariantType: "qwen3_4b" | "qwen3_8b"; + Qwen3VariantType: "qwen3_4b" | "qwen3_8b" | "qwen3_06b"; /** * Random Float * @description Outputs a single random float @@ -26990,6 +27682,82 @@ export type components = { */ type: "vae_output"; }; + /** + * VAE_Checkpoint_Anima_Config + * @description Model config for Anima QwenImage VAE checkpoint models (AutoencoderKLQwenImage). + */ + VAE_Checkpoint_Anima_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Config Path + * @description Path to the config for this model, if any. + */ + config_path: string | null; + /** + * Type + * @default vae + * @constant + */ + type: "vae"; + /** + * Format + * @default checkpoint + * @constant + */ + format: "checkpoint"; + /** + * Base + * @default anima + * @constant + */ + base: "anima"; + }; /** VAE_Checkpoint_FLUX_Config */ VAE_Checkpoint_FLUX_Config: { /** @@ -29124,7 +29892,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -29156,7 +29924,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -29206,7 +29974,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -29311,7 +30079,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -29382,7 +30150,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -30082,7 +30850,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 5d56c346f87..4d5f6841aab 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -196,6 +196,13 @@ export const isFlux2VAEModelConfig = (config: AnyModelConfig, excludeSubmodels?: ); }; +export const isAnimaVAEModelConfig = (config: AnyModelConfig, excludeSubmodels?: boolean): config is VAEModelConfig => { + return ( + (config.type === 'vae' || (!excludeSubmodels && config.type === 'main' && checkSubmodels(['vae'], config))) && + config.base === 'anima' + ); +}; + export const isControlNetModelConfig = (config: AnyModelConfig): config is ControlNetModelConfig => { return config.type === 'controlnet'; }; diff --git a/tests/app/invocations/test_anima_denoise.py b/tests/app/invocations/test_anima_denoise.py new file mode 100644 index 00000000000..733b33f75ce --- /dev/null +++ b/tests/app/invocations/test_anima_denoise.py @@ -0,0 +1,109 @@ +import pytest + +from invokeai.app.invocations.anima_denoise import ( + ANIMA_SHIFT, + AnimaDenoiseInvocation, + inverse_loglinear_timestep_shift, + loglinear_timestep_shift, +) + + +class TestLoglinearTimestepShift: + """Test the log-linear timestep shift function used for Anima's noise schedule.""" + + def test_shift_1_is_identity(self): + """With alpha=1.0, shift should be identity.""" + for t in [0.0, 0.25, 0.5, 0.75, 1.0]: + assert loglinear_timestep_shift(1.0, t) == t + + def test_shift_at_zero(self): + """At t=0, shifted sigma should be 0 regardless of alpha.""" + assert loglinear_timestep_shift(3.0, 0.0) == 0.0 + + def test_shift_at_one(self): + """At t=1, shifted sigma should be 1 regardless of alpha.""" + assert loglinear_timestep_shift(3.0, 1.0) == pytest.approx(1.0) + + def test_shift_3_increases_sigma(self): + """With alpha=3.0, sigma should be larger than t (spends more time at high noise).""" + for t in [0.1, 0.25, 0.5, 0.75, 0.9]: + sigma = loglinear_timestep_shift(3.0, t) + assert sigma > t, f"At t={t}, sigma={sigma} should be > t" + + def test_shift_monotonic(self): + """Shifted sigmas should be monotonically increasing with t.""" + prev = 0.0 + for i in range(1, 101): + t = i / 100.0 + sigma = loglinear_timestep_shift(3.0, t) + assert sigma > prev, f"Not monotonic at t={t}" + prev = sigma + + def test_known_value(self): + """Test a known value: at t=0.5, alpha=3.0, sigma = 3*0.5 / (1 + 2*0.5) = 0.75.""" + assert loglinear_timestep_shift(3.0, 0.5) == pytest.approx(0.75) + + +class TestInverseLoglinearTimestepShift: + """Test the inverse log-linear timestep shift (used for inpainting mask correction).""" + + def test_inverse_shift_1_is_identity(self): + """With alpha=1.0, inverse should be identity.""" + for sigma in [0.0, 0.25, 0.5, 0.75, 1.0]: + assert inverse_loglinear_timestep_shift(1.0, sigma) == sigma + + def test_roundtrip(self): + """shift(inverse(sigma)) should recover sigma, and inverse(shift(t)) should recover t.""" + for t in [0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]: + sigma = loglinear_timestep_shift(3.0, t) + recovered_t = inverse_loglinear_timestep_shift(3.0, sigma) + assert recovered_t == pytest.approx(t, abs=1e-7), ( + f"Roundtrip failed: t={t} -> sigma={sigma} -> recovered_t={recovered_t}" + ) + + def test_known_value(self): + """At sigma=0.75, alpha=3.0, t should be 0.5 (inverse of the known shift value).""" + assert inverse_loglinear_timestep_shift(3.0, 0.75) == pytest.approx(0.5) + + +class TestGetSigmas: + """Test the sigma schedule generation.""" + + def test_schedule_length(self): + """Schedule should have num_steps + 1 entries.""" + inv = AnimaDenoiseInvocation( + positive_conditioning=None, # type: ignore + transformer=None, # type: ignore + ) + sigmas = inv._get_sigmas(30) + assert len(sigmas) == 31 + + def test_schedule_endpoints(self): + """Schedule should start near 1.0 and end at 0.0.""" + inv = AnimaDenoiseInvocation( + positive_conditioning=None, # type: ignore + transformer=None, # type: ignore + ) + sigmas = inv._get_sigmas(30) + assert sigmas[0] == pytest.approx(loglinear_timestep_shift(ANIMA_SHIFT, 1.0)) + assert sigmas[-1] == pytest.approx(0.0) + + def test_schedule_monotonically_decreasing(self): + """Sigmas should decrease from noise to clean.""" + inv = AnimaDenoiseInvocation( + positive_conditioning=None, # type: ignore + transformer=None, # type: ignore + ) + sigmas = inv._get_sigmas(30) + for i in range(len(sigmas) - 1): + assert sigmas[i] > sigmas[i + 1], f"Not decreasing at index {i}: {sigmas[i]} <= {sigmas[i + 1]}" + + def test_schedule_uses_shift(self): + """With shift=3.0, middle sigmas should be larger than the linear midpoint.""" + inv = AnimaDenoiseInvocation( + positive_conditioning=None, # type: ignore + transformer=None, # type: ignore + ) + sigmas = inv._get_sigmas(10) + # At step 5/10, linear t = 0.5, shifted sigma should be 0.75 + assert sigmas[5] == pytest.approx(loglinear_timestep_shift(3.0, 0.5)) diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_format.py new file mode 100644 index 00000000000..3bb3ec00f76 --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_format.py @@ -0,0 +1,42 @@ +# A sample state dict in the Kohya Anima LoRA format. +# These keys are based on Anima LoRAs targeting the Cosmos Predict2 DiT transformer. +# Keys follow the pattern: lora_unet_blocks_{N}_{component}.{suffix} +state_dict_keys: dict[str, list[int]] = { + # Block 0 - cross attention + "lora_unet_blocks_0_cross_attn_k_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_k_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_k_proj.alpha": [], + "lora_unet_blocks_0_cross_attn_q_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_q_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_q_proj.alpha": [], + "lora_unet_blocks_0_cross_attn_v_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_v_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_v_proj.alpha": [], + "lora_unet_blocks_0_cross_attn_output_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_output_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_output_proj.alpha": [], + # Block 0 - self attention + "lora_unet_blocks_0_self_attn_k_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_self_attn_k_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_self_attn_k_proj.alpha": [], + "lora_unet_blocks_0_self_attn_q_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_self_attn_q_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_self_attn_q_proj.alpha": [], + "lora_unet_blocks_0_self_attn_v_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_self_attn_v_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_self_attn_v_proj.alpha": [], + "lora_unet_blocks_0_self_attn_output_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_self_attn_output_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_self_attn_output_proj.alpha": [], + # Block 0 - MLP + "lora_unet_blocks_0_mlp_layer1.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_mlp_layer1.lora_up.weight": [8192, 8], + "lora_unet_blocks_0_mlp_layer1.alpha": [], + "lora_unet_blocks_0_mlp_layer2.lora_down.weight": [8, 8192], + "lora_unet_blocks_0_mlp_layer2.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_mlp_layer2.alpha": [], + # Block 0 - adaln modulation + "lora_unet_blocks_0_adaln_modulation_cross_attn_1.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_adaln_modulation_cross_attn_1.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_adaln_modulation_cross_attn_1.alpha": [], +} diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_with_te_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_with_te_format.py new file mode 100644 index 00000000000..9499690f28a --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_kohya_with_te_format.py @@ -0,0 +1,34 @@ +# A sample state dict in the Kohya Anima LoRA format with Qwen3 text encoder layers. +# Contains both lora_unet_ (transformer) and lora_te_ (Qwen3 encoder) keys. +state_dict_keys: dict[str, list[int]] = { + # Transformer block 0 - cross attention + "lora_unet_blocks_0_cross_attn_k_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_k_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_k_proj.alpha": [], + "lora_unet_blocks_0_cross_attn_q_proj.lora_down.weight": [8, 2048], + "lora_unet_blocks_0_cross_attn_q_proj.lora_up.weight": [2048, 8], + "lora_unet_blocks_0_cross_attn_q_proj.alpha": [], + # Qwen3 text encoder layer 0 - self attention + "lora_te_layers_0_self_attn_q_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_self_attn_q_proj.lora_up.weight": [1024, 8], + "lora_te_layers_0_self_attn_q_proj.alpha": [], + "lora_te_layers_0_self_attn_k_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_self_attn_k_proj.lora_up.weight": [1024, 8], + "lora_te_layers_0_self_attn_k_proj.alpha": [], + "lora_te_layers_0_self_attn_v_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_self_attn_v_proj.lora_up.weight": [1024, 8], + "lora_te_layers_0_self_attn_v_proj.alpha": [], + "lora_te_layers_0_self_attn_o_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_self_attn_o_proj.lora_up.weight": [1024, 8], + "lora_te_layers_0_self_attn_o_proj.alpha": [], + # Qwen3 text encoder layer 0 - MLP + "lora_te_layers_0_mlp_gate_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_mlp_gate_proj.lora_up.weight": [2816, 8], + "lora_te_layers_0_mlp_gate_proj.alpha": [], + "lora_te_layers_0_mlp_down_proj.lora_down.weight": [8, 2816], + "lora_te_layers_0_mlp_down_proj.lora_up.weight": [1024, 8], + "lora_te_layers_0_mlp_down_proj.alpha": [], + "lora_te_layers_0_mlp_up_proj.lora_down.weight": [8, 1024], + "lora_te_layers_0_mlp_up_proj.lora_up.weight": [2816, 8], + "lora_te_layers_0_mlp_up_proj.alpha": [], +} diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_lokr_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_lokr_format.py new file mode 100644 index 00000000000..089208cca62 --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_lokr_format.py @@ -0,0 +1,18 @@ +# A sample state dict in the LoKR Anima LoRA format (with DoRA). +# Some Anima LoRAs use LoKR weights (lokr_w1/lokr_w2) combined with DoRA (dora_scale). +# The dora_scale should be stripped from LoKR layers during conversion. +state_dict_keys: dict[str, list[int]] = { + # Block 0 - cross attention with LoKR + DoRA + "diffusion_model.blocks.0.cross_attn.k_proj.lokr_w1": [2048, 8], + "diffusion_model.blocks.0.cross_attn.k_proj.lokr_w2": [8, 2048], + "diffusion_model.blocks.0.cross_attn.k_proj.alpha": [], + "diffusion_model.blocks.0.cross_attn.k_proj.dora_scale": [2048], + "diffusion_model.blocks.0.cross_attn.q_proj.lokr_w1": [2048, 8], + "diffusion_model.blocks.0.cross_attn.q_proj.lokr_w2": [8, 2048], + "diffusion_model.blocks.0.cross_attn.q_proj.alpha": [], + "diffusion_model.blocks.0.cross_attn.q_proj.dora_scale": [2048], + # Block 0 - self attention with LoKR (no DoRA) + "diffusion_model.blocks.0.self_attn.k_proj.lokr_w1": [2048, 8], + "diffusion_model.blocks.0.self_attn.k_proj.lokr_w2": [8, 2048], + "diffusion_model.blocks.0.self_attn.k_proj.alpha": [], +} diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_peft_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_peft_format.py new file mode 100644 index 00000000000..0edcd876f12 --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/anima_lora_peft_format.py @@ -0,0 +1,19 @@ +# A sample state dict in the diffusers PEFT Anima LoRA format. +# Keys follow the pattern: diffusion_model.blocks.{N}.{component}.lora_{A|B}.weight +state_dict_keys: dict[str, list[int]] = { + # Block 0 - cross attention + "diffusion_model.blocks.0.cross_attn.k_proj.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.cross_attn.k_proj.lora_B.weight": [2048, 8], + "diffusion_model.blocks.0.cross_attn.q_proj.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.cross_attn.q_proj.lora_B.weight": [2048, 8], + "diffusion_model.blocks.0.cross_attn.v_proj.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.cross_attn.v_proj.lora_B.weight": [2048, 8], + # Block 0 - self attention + "diffusion_model.blocks.0.self_attn.k_proj.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.self_attn.k_proj.lora_B.weight": [2048, 8], + "diffusion_model.blocks.0.self_attn.q_proj.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.self_attn.q_proj.lora_B.weight": [2048, 8], + # Block 0 - MLP + "diffusion_model.blocks.0.mlp.layer1.lora_A.weight": [8, 2048], + "diffusion_model.blocks.0.mlp.layer1.lora_B.weight": [8192, 8], +} diff --git a/tests/backend/patches/lora_conversions/test_anima_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_anima_lora_conversion_utils.py new file mode 100644 index 00000000000..f0c63b5c3e8 --- /dev/null +++ b/tests/backend/patches/lora_conversions/test_anima_lora_conversion_utils.py @@ -0,0 +1,227 @@ +import pytest +import torch + +from invokeai.backend.patches.lora_conversions.anima_lora_constants import ( + ANIMA_LORA_QWEN3_PREFIX, + ANIMA_LORA_TRANSFORMER_PREFIX, +) +from invokeai.backend.patches.lora_conversions.anima_lora_conversion_utils import ( + _convert_kohya_te_key, + _convert_kohya_unet_key, + is_state_dict_likely_anima_lora, + lora_model_from_anima_state_dict, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.anima_lora_kohya_format import ( + state_dict_keys as anima_kohya_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.anima_lora_kohya_with_te_format import ( + state_dict_keys as anima_kohya_te_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.anima_lora_lokr_format import ( + state_dict_keys as anima_lokr_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.anima_lora_peft_format import ( + state_dict_keys as anima_peft_keys, +) +from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict + + +# --- Detection Tests --- + + +@pytest.mark.parametrize( + "sd_keys", + [anima_kohya_keys, anima_kohya_te_keys, anima_peft_keys, anima_lokr_keys], + ids=["kohya", "kohya_te", "peft", "lokr"], +) +def test_is_state_dict_likely_anima_lora_true(sd_keys: dict[str, list[int]]): + """Test that is_state_dict_likely_anima_lora() correctly identifies Anima LoRA state dicts.""" + state_dict = keys_to_mock_state_dict(sd_keys) + assert is_state_dict_likely_anima_lora(state_dict) + + +def test_is_state_dict_likely_anima_lora_false_for_flux(): + """Test that is_state_dict_likely_anima_lora() returns False for a FLUX LoRA state dict.""" + state_dict = { + "lora_unet_double_blocks_0_img_attn_proj.lora_down.weight": torch.empty([16, 3072]), + "lora_unet_double_blocks_0_img_attn_proj.lora_up.weight": torch.empty([3072, 16]), + } + assert not is_state_dict_likely_anima_lora(state_dict) + + +def test_is_state_dict_likely_anima_lora_false_for_generic_blocks(): + """Test that is_state_dict_likely_anima_lora() returns False for a hypothetical architecture + that uses lora_unet_blocks_ but with non-Cosmos DiT subcomponent names.""" + state_dict = { + # Has lora_unet_blocks_ prefix but uses 'attention' and 'ff' instead of + # Cosmos DiT subcomponents (cross_attn, self_attn, mlp, adaln_modulation) + "lora_unet_blocks_0_attention_to_q.lora_down.weight": torch.empty([16, 512]), + "lora_unet_blocks_0_attention_to_q.lora_up.weight": torch.empty([512, 16]), + "lora_unet_blocks_0_ff_net_0_proj.lora_down.weight": torch.empty([16, 512]), + "lora_unet_blocks_0_ff_net_0_proj.lora_up.weight": torch.empty([2048, 16]), + } + assert not is_state_dict_likely_anima_lora(state_dict) + + +def test_is_state_dict_likely_anima_lora_false_for_generic_peft_blocks(): + """Test that is_state_dict_likely_anima_lora() returns False for a hypothetical architecture + that uses transformer.blocks. in PEFT format but with non-Cosmos subcomponents.""" + state_dict = { + "transformer.blocks.0.attention.to_q.lora_A.weight": torch.empty([16, 512]), + "transformer.blocks.0.attention.to_q.lora_B.weight": torch.empty([512, 16]), + "transformer.blocks.0.ff.net.0.proj.lora_A.weight": torch.empty([16, 512]), + "transformer.blocks.0.ff.net.0.proj.lora_B.weight": torch.empty([2048, 16]), + } + assert not is_state_dict_likely_anima_lora(state_dict) + + +def test_is_state_dict_likely_anima_lora_false_for_random(): + """Test that is_state_dict_likely_anima_lora() returns False for unrelated state dicts.""" + state_dict = { + "some_random_key.weight": torch.empty([64, 64]), + "another_key.bias": torch.empty([64]), + } + assert not is_state_dict_likely_anima_lora(state_dict) + + +# --- Kohya Key Conversion Tests --- + + +@pytest.mark.parametrize( + ["kohya_key", "expected"], + [ + ("lora_unet_blocks_0_cross_attn_k_proj", "blocks.0.cross_attn.k_proj"), + ("lora_unet_blocks_0_cross_attn_q_proj", "blocks.0.cross_attn.q_proj"), + ("lora_unet_blocks_0_cross_attn_v_proj", "blocks.0.cross_attn.v_proj"), + ("lora_unet_blocks_0_cross_attn_output_proj", "blocks.0.cross_attn.output_proj"), + ("lora_unet_blocks_0_self_attn_k_proj", "blocks.0.self_attn.k_proj"), + ("lora_unet_blocks_0_self_attn_q_proj", "blocks.0.self_attn.q_proj"), + ("lora_unet_blocks_0_self_attn_v_proj", "blocks.0.self_attn.v_proj"), + ("lora_unet_blocks_0_self_attn_output_proj", "blocks.0.self_attn.output_proj"), + ("lora_unet_blocks_0_mlp_layer1", "blocks.0.mlp.layer1"), + ("lora_unet_blocks_0_mlp_layer2", "blocks.0.mlp.layer2"), + ("lora_unet_blocks_27_cross_attn_k_proj", "blocks.27.cross_attn.k_proj"), + ("lora_unet_blocks_0_adaln_modulation_cross_attn_1", "blocks.0.adaln_modulation_cross_attn.1"), + ("lora_unet_blocks_0_adaln_modulation_self_attn_1", "blocks.0.adaln_modulation_self_attn.1"), + ("lora_unet_blocks_0_adaln_modulation_mlp_1", "blocks.0.adaln_modulation_mlp.1"), + # LLM Adapter keys + ("lora_unet_llm_adapter_blocks_0_cross_attn_k_proj", "llm_adapter.blocks.0.cross_attn.k_proj"), + ("lora_unet_llm_adapter_blocks_0_cross_attn_q_proj", "llm_adapter.blocks.0.cross_attn.q_proj"), + ("lora_unet_llm_adapter_blocks_0_cross_attn_v_proj", "llm_adapter.blocks.0.cross_attn.v_proj"), + ("lora_unet_llm_adapter_blocks_0_self_attn_k_proj", "llm_adapter.blocks.0.self_attn.k_proj"), + ("lora_unet_llm_adapter_blocks_0_self_attn_q_proj", "llm_adapter.blocks.0.self_attn.q_proj"), + ("lora_unet_llm_adapter_blocks_0_self_attn_v_proj", "llm_adapter.blocks.0.self_attn.v_proj"), + ("lora_unet_llm_adapter_blocks_5_cross_attn_k_proj", "llm_adapter.blocks.5.cross_attn.k_proj"), + ], +) +def test_convert_kohya_unet_key(kohya_key: str, expected: str): + """Test that Kohya unet keys are correctly converted to model parameter paths.""" + assert _convert_kohya_unet_key(kohya_key) == expected + + +@pytest.mark.parametrize( + ["kohya_key", "expected"], + [ + ("lora_te_layers_0_self_attn_q_proj", "model.layers.0.self_attn.q_proj"), + ("lora_te_layers_0_self_attn_k_proj", "model.layers.0.self_attn.k_proj"), + ("lora_te_layers_0_self_attn_v_proj", "model.layers.0.self_attn.v_proj"), + ("lora_te_layers_0_self_attn_o_proj", "model.layers.0.self_attn.o_proj"), + ("lora_te_layers_0_mlp_gate_proj", "model.layers.0.mlp.gate_proj"), + ("lora_te_layers_0_mlp_down_proj", "model.layers.0.mlp.down_proj"), + ("lora_te_layers_0_mlp_up_proj", "model.layers.0.mlp.up_proj"), + ("lora_te_layers_15_self_attn_q_proj", "model.layers.15.self_attn.q_proj"), + ], +) +def test_convert_kohya_te_key(kohya_key: str, expected: str): + """Test that Kohya TE keys are correctly converted to Qwen3 model parameter paths. + + The Qwen3 text encoder is loaded as Qwen3ForCausalLM which wraps the base model + under a `model.` prefix, so all converted paths must include it. + """ + assert _convert_kohya_te_key(kohya_key) == expected + + +# --- End-to-End Conversion Tests --- + + +@pytest.mark.parametrize( + "sd_keys", + [anima_kohya_keys, anima_kohya_te_keys, anima_peft_keys, anima_lokr_keys], + ids=["kohya", "kohya_te", "peft", "lokr"], +) +def test_lora_model_from_anima_state_dict(sd_keys: dict[str, list[int]]): + """Test that a ModelPatchRaw can be created from all supported Anima LoRA formats.""" + state_dict = keys_to_mock_state_dict(sd_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + assert len(lora_model.layers) > 0 + + +def test_kohya_unet_keys_get_transformer_prefix(): + """Test that Kohya unet keys are prefixed with the transformer prefix.""" + state_dict = keys_to_mock_state_dict(anima_kohya_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + + for key in lora_model.layers.keys(): + assert key.startswith(ANIMA_LORA_TRANSFORMER_PREFIX), ( + f"Expected transformer prefix '{ANIMA_LORA_TRANSFORMER_PREFIX}', got key: {key}" + ) + + +def test_kohya_te_keys_get_qwen3_prefix(): + """Test that Kohya TE keys are prefixed with the Qwen3 prefix.""" + state_dict = keys_to_mock_state_dict(anima_kohya_te_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + + has_transformer_keys = False + has_qwen3_keys = False + for key in lora_model.layers.keys(): + if key.startswith(ANIMA_LORA_TRANSFORMER_PREFIX): + has_transformer_keys = True + elif key.startswith(ANIMA_LORA_QWEN3_PREFIX): + has_qwen3_keys = True + else: + raise AssertionError(f"Key has unexpected prefix: {key}") + + assert has_transformer_keys, "Expected at least one transformer key" + assert has_qwen3_keys, "Expected at least one Qwen3 key" + + +def test_qwen3_keys_include_model_prefix(): + """Test that converted Qwen3 TE keys include 'model.' prefix for Qwen3ForCausalLM.""" + state_dict = keys_to_mock_state_dict(anima_kohya_te_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + + for key in lora_model.layers.keys(): + if key.startswith(ANIMA_LORA_QWEN3_PREFIX): + inner_key = key[len(ANIMA_LORA_QWEN3_PREFIX):] + assert inner_key.startswith("model."), ( + f"Qwen3 key should start with 'model.' after prefix, got: {inner_key}" + ) + + +def test_lokr_dora_keys_dont_crash(): + """Test that LoKR layers with dora_scale don't cause a KeyError. + + Some Anima LoRAs combine DoRA (dora_scale) with LoKR (lokr_w1/lokr_w2). + The dora_scale should be stripped from LoKR layers since shared code + doesn't support DoRA+LoKR combination. + """ + state_dict = keys_to_mock_state_dict(anima_lokr_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + assert len(lora_model.layers) > 0 + + +def test_peft_keys_get_transformer_prefix(): + """Test that diffusers PEFT keys are prefixed with the transformer prefix.""" + state_dict = keys_to_mock_state_dict(anima_peft_keys) + lora_model = lora_model_from_anima_state_dict(state_dict) + + for key in lora_model.layers.keys(): + assert key.startswith(ANIMA_LORA_TRANSFORMER_PREFIX), ( + f"Expected transformer prefix, got key: {key}" + ) + # Verify the diffusion_model. prefix is stripped + inner_key = key[len(ANIMA_LORA_TRANSFORMER_PREFIX):] + assert not inner_key.startswith("diffusion_model."), ( + f"diffusion_model. prefix should be stripped, got: {inner_key}" + )