From 1e32cae5d8c14eee5b2722de13c38a00c0c5b241 Mon Sep 17 00:00:00 2001 From: MengAiDev <3463526515@qq.com> Date: Mon, 18 Aug 2025 17:56:22 +0800 Subject: [PATCH] feat(autoencoders): add patchify support for AutoencoderKLWan - Add patchify and unpatchify operations to support patch-based processing - Enable patch_size configuration for the AutoencoderKLWan model --- src/diffusers/models/autoencoders/autoencoder_kl_wan.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_wan.py b/src/diffusers/models/autoencoders/autoencoder_kl_wan.py index d84a0861e984..c450eaf8a79f 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_wan.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_wan.py @@ -1289,6 +1289,10 @@ def tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput: blend_height = tile_latent_min_height - tile_latent_stride_height blend_width = tile_latent_min_width - tile_latent_stride_width + # Apply patchify if patch_size is specified + if self.config.patch_size is not None: + x = patchify(x, patch_size=self.config.patch_size) + # Split x into overlapping tiles and encode them separately. # The tiles have an overlap to avoid seams between tiles. rows = [] @@ -1392,6 +1396,10 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod dec = torch.cat(result_rows, dim=3)[:, :, :, :sample_height, :sample_width] + # Apply unpatchify if patch_size is specified + if self.config.patch_size is not None: + dec = unpatchify(dec, patch_size=self.config.patch_size) + if not return_dict: return (dec,) return DecoderOutput(sample=dec)