diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index f8680774f..b57e927b6 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -308,6 +308,20 @@ def calib(self, nsamples, bs): total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader)) if self.pipe.dtype != self.model.dtype: self.pipe.to(self.model.dtype) + + if ( + hasattr(self.model, "hf_device_map") + and len(self.model.hf_device_map) > 0 + and self.pipe.device != self.model.device + and torch.device(self.model.device).type in ["cuda", "xpu"] + ): + logger.error( + "Diffusion model is activated sequential model offloading, it will crash during moving to GPU/XPU. " + "Please use model path for quantization or " + "move the pipeline object to GPU/XPU before passing them into API." + ) + exit(-1) + if self.pipe.device != self.model.device: self.pipe.to(self.model.device) with tqdm(range(1, total + 1), desc="cache block inputs") as pbar: diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index 41bb91a01..9a5a8bfd3 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -28,7 +28,7 @@ def tearDownClass(self): @require_optimum def test_diffusion_tune(self): ## load the model - pipe = AutoPipelineForText2Image.from_pretrained(self.model_name) + pipe = AutoPipelineForText2Image.from_pretrained(self.model_name).to("cuda") model = pipe.transformer layer_config = {}