From 7bb13471fca9096f754dbce3ca4cd545b88e8ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 12 May 2026 19:51:26 +0200 Subject: [PATCH 1/2] fix sd_audio stereo format --- src/stable-diffusion.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index ef7548f5c..73ff37f31 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2575,7 +2575,20 @@ static sd_audio_t* waveform_to_sd_audio(const StableDiffusionGGML* sd, free(audio); return nullptr; } - std::memcpy(audio->data, waveform.data(), sample_bytes); + + const float* src = waveform.data(); + float* dst = audio->data; + + if (channels == 1) { + std::memcpy(dst, src, sample_bytes); + } else { + for (int64_t t = 0; t < sample_count; ++t) { + for (int64_t c = 0; c < channels; ++c) { + dst[t * channels + c] = src[c * sample_count + t]; + } + } + } + return audio; } From 50618bbfbd320d1ca101bca5d78d375706ebfe69 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 17 May 2026 16:50:39 +0800 Subject: [PATCH 2/2] simplify code --- src/stable-diffusion.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 73ff37f31..409761ddc 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2576,18 +2576,8 @@ static sd_audio_t* waveform_to_sd_audio(const StableDiffusionGGML* sd, return nullptr; } - const float* src = waveform.data(); - float* dst = audio->data; - - if (channels == 1) { - std::memcpy(dst, src, sample_bytes); - } else { - for (int64_t t = 0; t < sample_count; ++t) { - for (int64_t c = 0; c < channels; ++c) { - dst[t * channels + c] = src[c * sample_count + t]; - } - } - } + auto wavaform_t = waveform.permute({1, 0, 2, 3}); + std::memcpy(audio->data, wavaform_t.data(), sample_bytes); return audio; }