From f2f294fd96d703475efb10d01d10909e9c2e5dd4 Mon Sep 17 00:00:00 2001 From: Tai An Date: Thu, 30 Apr 2026 00:26:40 -0700 Subject: [PATCH] fix(stable_audio): align batched initial audio with prompts in prepare_latents --- src/diffusers/pipelines/stable_audio/pipeline_stable_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_audio/pipeline_stable_audio.py b/src/diffusers/pipelines/stable_audio/pipeline_stable_audio.py index 351c8b65de0e..5de0bf059987 100644 --- a/src/diffusers/pipelines/stable_audio/pipeline_stable_audio.py +++ b/src/diffusers/pipelines/stable_audio/pipeline_stable_audio.py @@ -483,7 +483,7 @@ def prepare_latents( audio[:, :, : min(audio_length, audio_vae_length)] = initial_audio_waveforms[:, :, :audio_vae_length] encoded_audio = self.vae.encode(audio).latent_dist.sample(generator) - encoded_audio = encoded_audio.repeat((num_waveforms_per_prompt, 1, 1)) + encoded_audio = encoded_audio.repeat_interleave(num_waveforms_per_prompt, dim=0) latents = encoded_audio + latents return latents