diff --git a/comfy/bg_removal_model.py b/comfy/bg_removal_model.py index cb7c2ee53e50..7877afd7f1b5 100644 --- a/comfy/bg_removal_model.py +++ b/comfy/bg_removal_model.py @@ -47,7 +47,7 @@ def encode_image(self, image): out = self.model(pixel_values=pixel_values) out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False) - mask = out.sigmoid() + mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) if mask.ndim == 3: mask = mask.unsqueeze(0) if mask.shape[1] != 1: diff --git a/comfy_extras/nodes_compositing.py b/comfy_extras/nodes_compositing.py index 5b44237348d8..720efc629f41 100644 --- a/comfy_extras/nodes_compositing.py +++ b/comfy_extras/nodes_compositing.py @@ -203,7 +203,7 @@ def define_schema(cls): @classmethod def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput: batch_size = max(len(image), len(alpha)) - alpha = 1.0 - resize_mask(alpha, image.shape[1:]) + alpha = 1.0 - resize_mask(alpha.to(image), image.shape[1:]) alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size) image = comfy.utils.repeat_to_batch_size(image, batch_size) return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1)) diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index ab1359fdb543..a4c85db77927 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -106,12 +106,12 @@ def execute(cls, vae, image, latent, strength, bypass=False) -> io.NodeOutput: if bypass: return (latent,) - samples = latent["samples"] + samples = latent["samples"].clone() _, height_scale_factor, width_scale_factor = ( vae.downscale_index_formula ) - batch, _, latent_frames, latent_height, latent_width = samples.shape + _, _, _, latent_height, latent_width = samples.shape width = latent_width * width_scale_factor height = latent_height * height_scale_factor @@ -124,11 +124,7 @@ def execute(cls, vae, image, latent, strength, bypass=False) -> io.NodeOutput: samples[:, :, :t.shape[2]] = t - conditioning_latent_frames_mask = torch.ones( - (batch, 1, latent_frames, 1, 1), - dtype=torch.float32, - device=samples.device, - ) + conditioning_latent_frames_mask = get_noise_mask(latent) conditioning_latent_frames_mask[:, :, :t.shape[2]] = 1.0 - strength return io.NodeOutput({"samples": samples, "noise_mask": conditioning_latent_frames_mask}) @@ -236,7 +232,7 @@ def define_schema(cls): def encode(cls, vae, latent_width, latent_height, images, scale_factors): time_scale_factor, width_scale_factor, height_scale_factor = scale_factors images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1] - pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1) + pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="center").movedim(1, -1) encode_pixels = pixels[:, :, :, :3] t = vae.encode(encode_pixels) return encode_pixels, t