From 719e5abb36cab9ae1d6c184e66c45ae62fca7276 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 9 Oct 2025 15:31:54 +0800
Subject: [PATCH 1/4] fp8 exporting bugfix

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/export/export_to_autoround/export_to_fp8.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/auto_round/export/export_to_autoround/export_to_fp8.py b/auto_round/export/export_to_autoround/export_to_fp8.py
index 7f069cb60..1f6cdbc65 100644
--- a/auto_round/export/export_to_autoround/export_to_fp8.py
+++ b/auto_round/export/export_to_autoround/export_to_fp8.py
@@ -109,11 +109,9 @@ def pack_layer(layer_name, model, data_type, device=None):
         torch_dtype = torch.float8_e5m2
     info = torch.finfo(torch_dtype)
     if zp is not None:
-        q_weight = (
-            weight.to(packing_device) / scale.to(packing_device).unsqueeze(-1) + zp.to(packing_device)
-            if isinstance(zp, torch.Tensor)
-            else zp
-        )
+        if isinstance(zp, torch.Tensor):
+            zp = zp.to(packing_device)
+        q_weight = weight.to(packing_device) / scale.to(packing_device).unsqueeze(-1) + zp
     else:
         q_weight = weight.to(packing_device) / scale.to(packing_device).unsqueeze(-1)
     q_weight = revert_tensor_by_pad(q_weight, orig_shape=orig_shape, pad_len=pad_len)
@@ -235,3 +233,4 @@ def wrapper(name):
     save_model(model, output_dir, safe_serialization=safe_serialization, dtype=dtype)
 
     return model
+

From c2daa79099713a7fcdd45f8a97f3fd5e7170c711 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 6 Nov 2025 16:52:46 +0800
Subject: [PATCH 2/4] refine exllama backend cuda UT

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 test/test_cuda/test_exllamav2_backend.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/test_cuda/test_exllamav2_backend.py b/test/test_cuda/test_exllamav2_backend.py
index 5c12e0557..38905e9bd 100644
--- a/test/test_cuda/test_exllamav2_backend.py
+++ b/test/test_cuda/test_exllamav2_backend.py
@@ -12,7 +12,7 @@
 
 from auto_round import AutoRound, AutoRoundConfig
 from auto_round.eval.evaluation import simple_evaluate_user_model
-from auto_round.testing_utils import require_autogptq, require_gptqmodel
+from auto_round.testing_utils import require_autogptq, require_gptqmodel, require_package_version_ut
 
 
 class LLMDataLoader:
@@ -24,7 +24,7 @@ def __iter__(self):
             yield torch.ones([1, 10], dtype=torch.long)
 
 
-class TestAutoRoundMarlinBackend(unittest.TestCase):
+class TestAutoRoundexllamaBackend(unittest.TestCase):
 
     @classmethod
     def setUpClass(self):
@@ -99,6 +99,7 @@ def test_gptqmodel_exllmav2_4bits_asym(self):
         shutil.rmtree("./saved", ignore_errors=True)
 
     @require_autogptq
+    @require_package_version_ut("torch", "<2.6.0")
     def test_gptq_exllamav2_4bits_sym(self):
         model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype="auto", trust_remote_code=True)
         tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
@@ -130,6 +131,7 @@ def test_gptq_exllamav2_4bits_sym(self):
         shutil.rmtree(self.save_folder, ignore_errors=True)
 
     @require_autogptq
+    @require_package_version_ut("torch", "<2.6.0")
     def test_gptq_exllamav2_4bits_sym_group_size(self):
         for group_size in [-1, 32, 64, 128, 256, 1024]:  ## 384, 768 has accuracy issue
             print(f"!!!!!!!!!!!!!!!!!{group_size}!!!!!!!!!!!!!!!!!")
@@ -166,3 +168,4 @@ def test_gptq_exllamav2_4bits_sym_group_size(self):
 
 if __name__ == "__main__":
     unittest.main()
+

From 76226fd8bd098ef1fce66e3e7371e00b7089cb2b Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Fri, 7 Nov 2025 11:49:27 +0800
Subject: [PATCH 3/4] fix non_auto_device_map typo

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/utils/device.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/auto_round/utils/device.py b/auto_round/utils/device.py
index b0ecf9019..9dee028f6 100644
--- a/auto_round/utils/device.py
+++ b/auto_round/utils/device.py
@@ -623,6 +623,8 @@ def set_non_auto_device_map(
         infos = device_map.split(",")
         device_map_dict = {}
         for info in infos:
+            if ":" not in info:
+                continue
             index = info.find(":")
             key = info[:index]
             value = info[index + 1 :]

From df3a10cedd0f5655a8c2b391203e549400b8e488 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 7 Nov 2025 03:51:13 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/export/export_to_autoround/export_to_fp8.py | 1 -
 test/test_cuda/test_exllamav2_backend.py               | 1 -
 2 files changed, 2 deletions(-)

diff --git a/auto_round/export/export_to_autoround/export_to_fp8.py b/auto_round/export/export_to_autoround/export_to_fp8.py
index 88f6d750c..8b8a618e2 100644
--- a/auto_round/export/export_to_autoround/export_to_fp8.py
+++ b/auto_round/export/export_to_autoround/export_to_fp8.py
@@ -228,4 +228,3 @@ def wrapper(name):
     save_model(model, output_dir, safe_serialization=safe_serialization, dtype=dtype)
 
     return model
-
diff --git a/test/test_cuda/test_exllamav2_backend.py b/test/test_cuda/test_exllamav2_backend.py
index 38905e9bd..c489b37b2 100644
--- a/test/test_cuda/test_exllamav2_backend.py
+++ b/test/test_cuda/test_exllamav2_backend.py
@@ -168,4 +168,3 @@ def test_gptq_exllamav2_4bits_sym_group_size(self):
 
 if __name__ == "__main__":
     unittest.main()
-