Fix llm model accuracy regression with IPEX 2.1.100 (#1499)

changwangss · web-flow · commit 3cb6d38f3e5a · 2023-12-27T20:35:46.000+08:00
Signed-off-by: Wang, Chang1 &lt;chang1.wang@intel.com&gt;
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -3365,7 +3365,6 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
         self._cfg_to_qconfig(tune_cfg, smooth_quant=True)
         update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
         model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
-
         # real calibration for other operators
         try:
             # IPEX may raise an error on the second iteration.
@@ -3383,8 +3382,10 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func):
                 + "using scale info from SmoothQuant for Linear and "
                 + "one iter calibration for other ops."
             )
-
         model._model.save_qconf_summary(qconf_summary=self.ipex_config_path)
+        if self.version.release > Version("2.1.0").release:
+            update_sq_scale(self.ipex_config_path, smoothquant_scale_info)
+            model._model.load_qconf_summary(qconf_summary=self.ipex_config_path)
         self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace)
 
         with open(self.ipex_config_path, "r") as f: