From 7d34e5706386e3f581517ad17be62284a4b41112 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 6 Nov 2025 14:12:30 +0800
Subject: [PATCH 1/4] disable itrex format first

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/utils/common.py                    |  3 +--
 auto_round_extension/qbits/qlinear_qbits.py   |  9 +-------
 .../qbits/qlinear_qbits_gptq.py               |  9 +-------
 test/test_cpu/test_autoround.py               | 23 +------------------
 test/test_cpu/test_autoround_acc.py           |  2 +-
 5 files changed, 5 insertions(+), 41 deletions(-)

diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index dea3f8c81..2c655d77d 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -88,8 +88,6 @@ def __init__(self):
             "auto_round:gptqmodel",
             "auto_round:auto_awq",
             "auto_round:llm_compressor",
-            "itrex",
-            "itrex_xpu",
             "fake",
             "llm_compressor",
         )
@@ -297,3 +295,4 @@ def get_reciprocal(tensor):
     else:
         tensor = torch.where(torch.abs(tensor) < 1e-30, 0, tensor)
     return torch.where(tensor != 0, 1 / tensor, torch.zeros_like(tensor))
+
diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py
index 202f1eb19..05a5500e9 100644
--- a/auto_round_extension/qbits/qlinear_qbits.py
+++ b/auto_round_extension/qbits/qlinear_qbits.py
@@ -92,14 +92,6 @@ def req_check(self):
         torch_version = str(torch.__version__)
         if QBITS_AVAILABLE:
             pass
-            # import intel_extension_for_transformers
-            # itrex_version = str(intel_extension_for_transformers.__version__)
-            # version_match_map = {"1.4": "2.2.0+cpu",
-            #                      "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"}
-            # if itrex_version in version_match_map:
-            #     if torch_version != version_match_map[itrex_version]:
-            #         logger.warning(
-            #             f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.")
         else:
             logger.error(
                 "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. "
@@ -228,3 +220,4 @@ def dequantize_weight(qweight, qzeros, scales, bits):
 
 
 __all__ = ["QuantLinear"]
+
diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py
index c9c9e1fea..3c012d693 100644
--- a/auto_round_extension/qbits/qlinear_qbits_gptq.py
+++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py
@@ -92,14 +92,6 @@ def req_check(self):
         torch_version = str(torch.__version__)
         if QBITS_AVAILABLE:
             pass
-            # import intel_extension_for_transformers
-            # itrex_version = str(intel_extension_for_transformers.__version__)
-            # version_match_map = {"1.4": "2.2.0+cpu",
-            #                      "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"}
-            # if itrex_version in version_match_map:
-            #     if torch_version != version_match_map[itrex_version]:
-            #         logger.warning(
-            #             f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.")
         else:
             logger.error(
                 "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. "
@@ -229,3 +221,4 @@ def dequantize_weight(qweight, qzeros, scales, bits):
 
 
 __all__ = ["QuantLinear"]
+
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py
index 7dfd4b479..9a9960b89 100644
--- a/test/test_cpu/test_autoround.py
+++ b/test/test_cpu/test_autoround.py
@@ -162,28 +162,6 @@ def test_nv_fp4(self):
         print(result["results"]["lambada_openai"]["acc,none"])
         self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.35)
 
-    def test_default(self):
-        bits, group_size, sym = 4, 128, False
-        autoround = AutoRound(
-            self.model,
-            self.tokenizer,
-            bits=bits,
-            group_size=group_size,
-            sym=sym,
-            iters=2,
-            seqlen=2,
-            dataset=self.llm_dataloader,
-        )
-        autoround.quantize()
-
-        autoround.save_quantized(output_dir="./saved", inplace=False, format="itrex")
-        try:
-            import auto_gptq
-        except:
-            return
-        if torch.cuda.is_available():
-            autoround.save_quantized(output_dir="./saved", inplace=False)
-
     def test_w4g1(self):
         model_name = "/tf_dataset/auto_round/models/facebook/opt-125m"
         bits, group_size, sym = 4, -1, True
@@ -817,3 +795,4 @@ def test_create_adam(self):
 
 if __name__ == "__main__":
     unittest.main()
+
diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py
index 97211ade4..3f77198a6 100644
--- a/test/test_cpu/test_autoround_acc.py
+++ b/test/test_cpu/test_autoround_acc.py
@@ -13,7 +13,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from auto_round import AutoRound  # pylint: disable=E0401
-from auto_round.export.export_to_itrex.export import pack_model  # pylint: disable=E0401
 
 
 class LLMDataLoader:
@@ -92,3 +91,4 @@ def test_3bits_asym_autoround(self):
 
 if __name__ == "__main__":
     unittest.main()
+

From 8dc5c90dd56262d5583f2767736d46d689ba89d7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 6 Nov 2025 06:13:14 +0000
Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/utils/common.py                       | 1 -
 auto_round_extension/qbits/qlinear_qbits.py      | 1 -
 auto_round_extension/qbits/qlinear_qbits_gptq.py | 1 -
 test/test_cpu/test_autoround.py                  | 1 -
 test/test_cpu/test_autoround_acc.py              | 1 -
 5 files changed, 5 deletions(-)

diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
index 2c655d77d..bdac9725c 100644
--- a/auto_round/utils/common.py
+++ b/auto_round/utils/common.py
@@ -295,4 +295,3 @@ def get_reciprocal(tensor):
     else:
         tensor = torch.where(torch.abs(tensor) < 1e-30, 0, tensor)
     return torch.where(tensor != 0, 1 / tensor, torch.zeros_like(tensor))
-
diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py
index 05a5500e9..476b08aaa 100644
--- a/auto_round_extension/qbits/qlinear_qbits.py
+++ b/auto_round_extension/qbits/qlinear_qbits.py
@@ -220,4 +220,3 @@ def dequantize_weight(qweight, qzeros, scales, bits):
 
 
 __all__ = ["QuantLinear"]
-
diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py
index 3c012d693..ba83d0641 100644
--- a/auto_round_extension/qbits/qlinear_qbits_gptq.py
+++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py
@@ -221,4 +221,3 @@ def dequantize_weight(qweight, qzeros, scales, bits):
 
 
 __all__ = ["QuantLinear"]
-
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py
index 9a9960b89..19e7f2fa1 100644
--- a/test/test_cpu/test_autoround.py
+++ b/test/test_cpu/test_autoround.py
@@ -795,4 +795,3 @@ def test_create_adam(self):
 
 if __name__ == "__main__":
     unittest.main()
-
diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py
index 3f77198a6..41b28e663 100644
--- a/test/test_cpu/test_autoround_acc.py
+++ b/test/test_cpu/test_autoround_acc.py
@@ -91,4 +91,3 @@ def test_3bits_asym_autoround(self):
 
 if __name__ == "__main__":
     unittest.main()
-

From cc13a65bd47a3c2ca2282817a81dc2f0fc599b2d Mon Sep 17 00:00:00 2001
From: Weiwei <weiwei1.zhang@intel.com>
Date: Tue, 11 Nov 2025 14:21:23 +0800
Subject: [PATCH 3/4] revert typo

---
 auto_round_extension/qbits/qlinear_qbits.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py
index 476b08aaa..202f1eb19 100644
--- a/auto_round_extension/qbits/qlinear_qbits.py
+++ b/auto_round_extension/qbits/qlinear_qbits.py
@@ -92,6 +92,14 @@ def req_check(self):
         torch_version = str(torch.__version__)
         if QBITS_AVAILABLE:
             pass
+            # import intel_extension_for_transformers
+            # itrex_version = str(intel_extension_for_transformers.__version__)
+            # version_match_map = {"1.4": "2.2.0+cpu",
+            #                      "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"}
+            # if itrex_version in version_match_map:
+            #     if torch_version != version_match_map[itrex_version]:
+            #         logger.warning(
+            #             f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.")
         else:
             logger.error(
                 "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. "

From d41eac325283686aa459d069cfb1714358d727f6 Mon Sep 17 00:00:00 2001
From: Weiwei <weiwei1.zhang@intel.com>
Date: Tue, 11 Nov 2025 14:21:56 +0800
Subject: [PATCH 4/4] revert typo

---
 auto_round_extension/qbits/qlinear_qbits_gptq.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py
index ba83d0641..c9c9e1fea 100644
--- a/auto_round_extension/qbits/qlinear_qbits_gptq.py
+++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py
@@ -92,6 +92,14 @@ def req_check(self):
         torch_version = str(torch.__version__)
         if QBITS_AVAILABLE:
             pass
+            # import intel_extension_for_transformers
+            # itrex_version = str(intel_extension_for_transformers.__version__)
+            # version_match_map = {"1.4": "2.2.0+cpu",
+            #                      "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"}
+            # if itrex_version in version_match_map:
+            #     if torch_version != version_match_map[itrex_version]:
+            #         logger.warning(
+            #             f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.")
         else:
             logger.error(
                 "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. "