From 7d34e5706386e3f581517ad17be62284a4b41112 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Thu, 6 Nov 2025 14:12:30 +0800 Subject: [PATCH 1/4] disable itrex format first Signed-off-by: Zhang, Weiwei1 --- auto_round/utils/common.py | 3 +-- auto_round_extension/qbits/qlinear_qbits.py | 9 +------- .../qbits/qlinear_qbits_gptq.py | 9 +------- test/test_cpu/test_autoround.py | 23 +------------------ test/test_cpu/test_autoround_acc.py | 2 +- 5 files changed, 5 insertions(+), 41 deletions(-) diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index dea3f8c81..2c655d77d 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -88,8 +88,6 @@ def __init__(self): "auto_round:gptqmodel", "auto_round:auto_awq", "auto_round:llm_compressor", - "itrex", - "itrex_xpu", "fake", "llm_compressor", ) @@ -297,3 +295,4 @@ def get_reciprocal(tensor): else: tensor = torch.where(torch.abs(tensor) < 1e-30, 0, tensor) return torch.where(tensor != 0, 1 / tensor, torch.zeros_like(tensor)) + diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py index 202f1eb19..05a5500e9 100644 --- a/auto_round_extension/qbits/qlinear_qbits.py +++ b/auto_round_extension/qbits/qlinear_qbits.py @@ -92,14 +92,6 @@ def req_check(self): torch_version = str(torch.__version__) if QBITS_AVAILABLE: pass - # import intel_extension_for_transformers - # itrex_version = str(intel_extension_for_transformers.__version__) - # version_match_map = {"1.4": "2.2.0+cpu", - # "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"} - # if itrex_version in version_match_map: - # if torch_version != version_match_map[itrex_version]: - # logger.warning( - # f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.") else: logger.error( "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. " @@ -228,3 +220,4 @@ def dequantize_weight(qweight, qzeros, scales, bits): __all__ = ["QuantLinear"] + diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py index c9c9e1fea..3c012d693 100644 --- a/auto_round_extension/qbits/qlinear_qbits_gptq.py +++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py @@ -92,14 +92,6 @@ def req_check(self): torch_version = str(torch.__version__) if QBITS_AVAILABLE: pass - # import intel_extension_for_transformers - # itrex_version = str(intel_extension_for_transformers.__version__) - # version_match_map = {"1.4": "2.2.0+cpu", - # "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"} - # if itrex_version in version_match_map: - # if torch_version != version_match_map[itrex_version]: - # logger.warning( - # f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.") else: logger.error( "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. " @@ -229,3 +221,4 @@ def dequantize_weight(qweight, qzeros, scales, bits): __all__ = ["QuantLinear"] + diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py index 7dfd4b479..9a9960b89 100644 --- a/test/test_cpu/test_autoround.py +++ b/test/test_cpu/test_autoround.py @@ -162,28 +162,6 @@ def test_nv_fp4(self): print(result["results"]["lambada_openai"]["acc,none"]) self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.35) - def test_default(self): - bits, group_size, sym = 4, 128, False - autoround = AutoRound( - self.model, - self.tokenizer, - bits=bits, - group_size=group_size, - sym=sym, - iters=2, - seqlen=2, - dataset=self.llm_dataloader, - ) - autoround.quantize() - - autoround.save_quantized(output_dir="./saved", inplace=False, format="itrex") - try: - import auto_gptq - except: - return - if torch.cuda.is_available(): - autoround.save_quantized(output_dir="./saved", inplace=False) - def test_w4g1(self): model_name = "/tf_dataset/auto_round/models/facebook/opt-125m" bits, group_size, sym = 4, -1, True @@ -817,3 +795,4 @@ def test_create_adam(self): if __name__ == "__main__": unittest.main() + diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py index 97211ade4..3f77198a6 100644 --- a/test/test_cpu/test_autoround_acc.py +++ b/test/test_cpu/test_autoround_acc.py @@ -13,7 +13,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from auto_round import AutoRound # pylint: disable=E0401 -from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401 class LLMDataLoader: @@ -92,3 +91,4 @@ def test_3bits_asym_autoround(self): if __name__ == "__main__": unittest.main() + From 8dc5c90dd56262d5583f2767736d46d689ba89d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Nov 2025 06:13:14 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/utils/common.py | 1 - auto_round_extension/qbits/qlinear_qbits.py | 1 - auto_round_extension/qbits/qlinear_qbits_gptq.py | 1 - test/test_cpu/test_autoround.py | 1 - test/test_cpu/test_autoround_acc.py | 1 - 5 files changed, 5 deletions(-) diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 2c655d77d..bdac9725c 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -295,4 +295,3 @@ def get_reciprocal(tensor): else: tensor = torch.where(torch.abs(tensor) < 1e-30, 0, tensor) return torch.where(tensor != 0, 1 / tensor, torch.zeros_like(tensor)) - diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py index 05a5500e9..476b08aaa 100644 --- a/auto_round_extension/qbits/qlinear_qbits.py +++ b/auto_round_extension/qbits/qlinear_qbits.py @@ -220,4 +220,3 @@ def dequantize_weight(qweight, qzeros, scales, bits): __all__ = ["QuantLinear"] - diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py index 3c012d693..ba83d0641 100644 --- a/auto_round_extension/qbits/qlinear_qbits_gptq.py +++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py @@ -221,4 +221,3 @@ def dequantize_weight(qweight, qzeros, scales, bits): __all__ = ["QuantLinear"] - diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py index 9a9960b89..19e7f2fa1 100644 --- a/test/test_cpu/test_autoround.py +++ b/test/test_cpu/test_autoround.py @@ -795,4 +795,3 @@ def test_create_adam(self): if __name__ == "__main__": unittest.main() - diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py index 3f77198a6..41b28e663 100644 --- a/test/test_cpu/test_autoround_acc.py +++ b/test/test_cpu/test_autoround_acc.py @@ -91,4 +91,3 @@ def test_3bits_asym_autoround(self): if __name__ == "__main__": unittest.main() - From cc13a65bd47a3c2ca2282817a81dc2f0fc599b2d Mon Sep 17 00:00:00 2001 From: Weiwei Date: Tue, 11 Nov 2025 14:21:23 +0800 Subject: [PATCH 3/4] revert typo --- auto_round_extension/qbits/qlinear_qbits.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/auto_round_extension/qbits/qlinear_qbits.py b/auto_round_extension/qbits/qlinear_qbits.py index 476b08aaa..202f1eb19 100644 --- a/auto_round_extension/qbits/qlinear_qbits.py +++ b/auto_round_extension/qbits/qlinear_qbits.py @@ -92,6 +92,14 @@ def req_check(self): torch_version = str(torch.__version__) if QBITS_AVAILABLE: pass + # import intel_extension_for_transformers + # itrex_version = str(intel_extension_for_transformers.__version__) + # version_match_map = {"1.4": "2.2.0+cpu", + # "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"} + # if itrex_version in version_match_map: + # if torch_version != version_match_map[itrex_version]: + # logger.warning( + # f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.") else: logger.error( "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. " From d41eac325283686aa459d069cfb1714358d727f6 Mon Sep 17 00:00:00 2001 From: Weiwei Date: Tue, 11 Nov 2025 14:21:56 +0800 Subject: [PATCH 4/4] revert typo --- auto_round_extension/qbits/qlinear_qbits_gptq.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/auto_round_extension/qbits/qlinear_qbits_gptq.py b/auto_round_extension/qbits/qlinear_qbits_gptq.py index ba83d0641..c9c9e1fea 100644 --- a/auto_round_extension/qbits/qlinear_qbits_gptq.py +++ b/auto_round_extension/qbits/qlinear_qbits_gptq.py @@ -92,6 +92,14 @@ def req_check(self): torch_version = str(torch.__version__) if QBITS_AVAILABLE: pass + # import intel_extension_for_transformers + # itrex_version = str(intel_extension_for_transformers.__version__) + # version_match_map = {"1.4": "2.2.0+cpu", + # "1.4.1": "2.2.0+cpu", "1.4.2": "2.3.0+cpu"} + # if itrex_version in version_match_map: + # if torch_version != version_match_map[itrex_version]: + # logger.warning( + # f"Please install torch {version_match_map[itrex_version]} by command 'pip install torch=={version_match_map[itrex_version]} --extra-index-url https://download.pytorch.org/whl/cpu' as Intel Extension for Transformers {itrex_version} is not compatible with current torch.") else: logger.error( "Please install Intel Extension for Transformers by running 'pip install intel-extension-for-transformers' as qbits linear requirements checking fail. "