From ab27da1d8c6bd2f0035a3260d144aec9f3d7c8d3 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 6 Nov 2025 21:01:04 -0500 Subject: [PATCH 1/4] Fix diffusion multi-device ut issue --- auto_round/compressors/diffusion/compressor.py | 8 ++++++++ test/test_cuda/test_diffusion.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index f8680774f..501209607 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -308,6 +308,14 @@ def calib(self, nsamples, bs): total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader)) if self.pipe.dtype != self.model.dtype: self.pipe.to(self.model.dtype) + + if hasattr(self.model, "hf_device_map") and len(self.model.hf_device_map) > 0 and self.pipe.device != self.model.device and torch.device(self.model.device).type in ["cuda", "xpu"]: + logger.error( + "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. " + "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API" + ) + exit(-1) + if self.pipe.device != self.model.device: self.pipe.to(self.model.device) with tqdm(range(1, total + 1), desc="cache block inputs") as pbar: diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index 41bb91a01..9a5a8bfd3 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -28,7 +28,7 @@ def tearDownClass(self): @require_optimum def test_diffusion_tune(self): ## load the model - pipe = AutoPipelineForText2Image.from_pretrained(self.model_name) + pipe = AutoPipelineForText2Image.from_pretrained(self.model_name).to("cuda") model = pipe.transformer layer_config = {} From 3cb933390bf02fb1b5a331529446a78176cabc3e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 02:02:44 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/diffusion/compressor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index 501209607..f78b48a9f 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -309,7 +309,12 @@ def calib(self, nsamples, bs): if self.pipe.dtype != self.model.dtype: self.pipe.to(self.model.dtype) - if hasattr(self.model, "hf_device_map") and len(self.model.hf_device_map) > 0 and self.pipe.device != self.model.device and torch.device(self.model.device).type in ["cuda", "xpu"]: + if ( + hasattr(self.model, "hf_device_map") + and len(self.model.hf_device_map) > 0 + and self.pipe.device != self.model.device + and torch.device(self.model.device).type in ["cuda", "xpu"] + ): logger.error( "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. " "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API" From b58424bf12215841d94670e28827503240a2c537 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Fri, 7 Nov 2025 10:04:01 +0800 Subject: [PATCH 3/4] Update compressor.py --- auto_round/compressors/diffusion/compressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index f78b48a9f..80d92afdd 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -317,7 +317,7 @@ def calib(self, nsamples, bs): ): logger.error( "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. " - "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API" + "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API." ) exit(-1) From 8dd4a03fc07e143e16c994d1c6c20468b9d30be8 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Fri, 7 Nov 2025 10:20:33 +0800 Subject: [PATCH 4/4] Update compressor.py --- auto_round/compressors/diffusion/compressor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index 80d92afdd..b57e927b6 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -317,7 +317,8 @@ def calib(self, nsamples, bs): ): logger.error( "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. " - "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API." + "Please use model path for quantization or " + "move the pipeline object to GPU/XPU before passing them into API." ) exit(-1)