From ab27da1d8c6bd2f0035a3260d144aec9f3d7c8d3 Mon Sep 17 00:00:00 2001
From: Mengni Wang <mengni.wang@intel.com>
Date: Thu, 6 Nov 2025 21:01:04 -0500
Subject: [PATCH 1/4] Fix diffusion multi-device ut issue

---
 auto_round/compressors/diffusion/compressor.py | 8 ++++++++
 test/test_cuda/test_diffusion.py               | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
index f8680774f..501209607 100644
--- a/auto_round/compressors/diffusion/compressor.py
+++ b/auto_round/compressors/diffusion/compressor.py
@@ -308,6 +308,14 @@ def calib(self, nsamples, bs):
         total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader))
         if self.pipe.dtype != self.model.dtype:
             self.pipe.to(self.model.dtype)
+
+        if hasattr(self.model, "hf_device_map") and len(self.model.hf_device_map) > 0 and self.pipe.device != self.model.device and torch.device(self.model.device).type in ["cuda", "xpu"]:
+            logger.error(
+                "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. "
+                "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API"
+            )
+            exit(-1)
+
         if self.pipe.device != self.model.device:
             self.pipe.to(self.model.device)
         with tqdm(range(1, total + 1), desc="cache block inputs") as pbar:
diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py
index 41bb91a01..9a5a8bfd3 100644
--- a/test/test_cuda/test_diffusion.py
+++ b/test/test_cuda/test_diffusion.py
@@ -28,7 +28,7 @@ def tearDownClass(self):
     @require_optimum
     def test_diffusion_tune(self):
         ## load the model
-        pipe = AutoPipelineForText2Image.from_pretrained(self.model_name)
+        pipe = AutoPipelineForText2Image.from_pretrained(self.model_name).to("cuda")
         model = pipe.transformer
 
         layer_config = {}

From 3cb933390bf02fb1b5a331529446a78176cabc3e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 7 Nov 2025 02:02:44 +0000
Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/compressors/diffusion/compressor.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
index 501209607..f78b48a9f 100644
--- a/auto_round/compressors/diffusion/compressor.py
+++ b/auto_round/compressors/diffusion/compressor.py
@@ -309,7 +309,12 @@ def calib(self, nsamples, bs):
         if self.pipe.dtype != self.model.dtype:
             self.pipe.to(self.model.dtype)
 
-        if hasattr(self.model, "hf_device_map") and len(self.model.hf_device_map) > 0 and self.pipe.device != self.model.device and torch.device(self.model.device).type in ["cuda", "xpu"]:
+        if (
+            hasattr(self.model, "hf_device_map")
+            and len(self.model.hf_device_map) > 0
+            and self.pipe.device != self.model.device
+            and torch.device(self.model.device).type in ["cuda", "xpu"]
+        ):
             logger.error(
                 "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. "
                 "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API"

From b58424bf12215841d94670e28827503240a2c537 Mon Sep 17 00:00:00 2001
From: "Wang, Mengni" <mengni.wang@intel.com>
Date: Fri, 7 Nov 2025 10:04:01 +0800
Subject: [PATCH 3/4] Update compressor.py

---
 auto_round/compressors/diffusion/compressor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
index f78b48a9f..80d92afdd 100644
--- a/auto_round/compressors/diffusion/compressor.py
+++ b/auto_round/compressors/diffusion/compressor.py
@@ -317,7 +317,7 @@ def calib(self, nsamples, bs):
         ):
             logger.error(
                 "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. "
-                "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API"
+                "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API."
             )
             exit(-1)
 

From 8dd4a03fc07e143e16c994d1c6c20468b9d30be8 Mon Sep 17 00:00:00 2001
From: "Wang, Mengni" <mengni.wang@intel.com>
Date: Fri, 7 Nov 2025 10:20:33 +0800
Subject: [PATCH 4/4] Update compressor.py

---
 auto_round/compressors/diffusion/compressor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
index 80d92afdd..b57e927b6 100644
--- a/auto_round/compressors/diffusion/compressor.py
+++ b/auto_round/compressors/diffusion/compressor.py
@@ -317,7 +317,8 @@ def calib(self, nsamples, bs):
         ):
             logger.error(
                 "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. "
-                "Please use model path for quantization or move the pipeline object to GPU/XPU before passing them into API."
+                "Please use model path for quantization or "
+                "move the pipeline object to GPU/XPU before passing them into API."
             )
             exit(-1)