From 28e702dd45cbbff4df97c9ef6ee1afd8453f3679 Mon Sep 17 00:00:00 2001 From: David Briand Date: Thu, 30 Apr 2026 08:07:20 +0000 Subject: [PATCH] [qwenimage] add image_area kwarg to QwenImageEditPlusPipeline --- .../qwenimage/pipeline_qwenimage_edit_plus.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py index 57749e6ce1c2..fdbbaa733c36 100644 --- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py +++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py @@ -64,7 +64,6 @@ """ CONDITION_IMAGE_SIZE = 384 * 384 -VAE_IMAGE_SIZE = 1024 * 1024 # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift @@ -534,6 +533,7 @@ def __call__( true_cfg_scale: float = 4.0, height: int | None = None, width: int | None = None, + image_area: int = 1024 * 1024, num_inference_steps: int = 50, sigmas: list[float] | None = None, guidance_scale: float | None = None, @@ -579,6 +579,12 @@ def __call__( The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The width in pixels of the generated image. This is set to 1024 by default for the best results. + image_area (`int`, *optional*, defaults to `1024 * 1024`): + Target pixel area used to derive (a) the default output `height`/`width` from the input image's + aspect ratio when those are not explicitly provided, and (b) the resolution at which the input + image(s) are encoded by the VAE. When `height` and `width` are both passed explicitly they + override the default-derivation, but `image_area` still controls the VAE-encoding size of the + input image(s). num_inference_steps (`int`, *optional*, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. @@ -640,7 +646,7 @@ def __call__( returning a tuple, the first element is a list with the generated images. """ image_size = image[-1].size if isinstance(image, list) else image.size - calculated_width, calculated_height = calculate_dimensions(1024 * 1024, image_size[0] / image_size[1]) + calculated_width, calculated_height = calculate_dimensions(image_area, image_size[0] / image_size[1]) height = height or calculated_height width = width or calculated_width @@ -696,7 +702,7 @@ def __call__( condition_width, condition_height = calculate_dimensions( CONDITION_IMAGE_SIZE, image_width / image_height ) - vae_width, vae_height = calculate_dimensions(VAE_IMAGE_SIZE, image_width / image_height) + vae_width, vae_height = calculate_dimensions(image_area, image_width / image_height) condition_image_sizes.append((condition_width, condition_height)) vae_image_sizes.append((vae_width, vae_height)) condition_images.append(self.image_processor.resize(img, condition_height, condition_width))