Skip to content

Commit 6fbb6b6

Browse files
authored
Fix LTXV Reference Audio node (Comfy-Org#13531)
1 parent abf3d56 commit 6fbb6b6

1 file changed

Lines changed: 9 additions & 1 deletion

File tree

comfy_extras/nodes_lt.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import nodes
22
import node_helpers
33
import torch
4+
import torchaudio
45
import comfy.model_management
56
import comfy.model_sampling
67
import comfy.samplers
@@ -711,7 +712,14 @@ def define_schema(cls) -> io.Schema:
711712
@classmethod
712713
def execute(cls, model, positive, negative, reference_audio, audio_vae, identity_guidance_scale, start_percent, end_percent) -> io.NodeOutput:
713714
# Encode reference audio to latents and patchify
714-
audio_latents = audio_vae.encode(reference_audio)
715+
sample_rate = reference_audio["sample_rate"]
716+
vae_sample_rate = getattr(audio_vae, "audio_sample_rate", 44100)
717+
if vae_sample_rate != sample_rate:
718+
waveform = torchaudio.functional.resample(reference_audio["waveform"], sample_rate, vae_sample_rate)
719+
else:
720+
waveform = reference_audio["waveform"]
721+
722+
audio_latents = audio_vae.encode(waveform.movedim(1, -1))
715723
b, c, t, f = audio_latents.shape
716724
ref_tokens = audio_latents.permute(0, 2, 1, 3).reshape(b, t, c * f)
717725
ref_audio = {"tokens": ref_tokens}

0 commit comments

Comments
 (0)