intel · wenhuach21 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
@@ -308,6 +308,20 @@ def calib(self, nsamples, bs):
         total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader))
         if self.pipe.dtype != self.model.dtype:
             self.pipe.to(self.model.dtype)
+
+        if (
+            hasattr(self.model, "hf_device_map")
+            and len(self.model.hf_device_map) > 0
+            and self.pipe.device != self.model.device
+            and torch.device(self.model.device).type in ["cuda", "xpu"]
+        ):
+            logger.error(
+                "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. "
+                "Please use model path for quantization or "
+                "move the pipeline object to GPU/XPU before passing them into API."
+            )
+            exit(-1)
+
         if self.pipe.device != self.model.device:
             self.pipe.to(self.model.device)
         with tqdm(range(1, total + 1), desc="cache block inputs") as pbar:

diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py
@@ -28,7 +28,7 @@ def tearDownClass(self):
     @require_optimum
     def test_diffusion_tune(self):
         ## load the model
-        pipe = AutoPipelineForText2Image.from_pretrained(self.model_name)
+        pipe = AutoPipelineForText2Image.from_pretrained(self.model_name).to("cuda")
         model = pipe.transformer
 
         layer_config = {}