@@ -197,12 +197,6 @@ def test_sq_save_load_with_quantize_API(self):
197197 q_model = quantize (fp32_model , quant_config = quant_config , run_fn = run_fn , example_inputs = example_inputs )
198198 assert q_model is not None , "Quantization failed!"
199199 q_model .save ("saved_results" )
200-
201- quant_config .folding = True
202- fp32_model = copy .deepcopy (model )
203- q_model = quantize (fp32_model , quant_config = quant_config , run_fn = run_fn , example_inputs = example_inputs )
204- assert q_model is not None , "Quantization failed!"
205- q_model .save ("saved_results" )
206200 inc_out = q_model (example_inputs )
207201
208202 from neural_compressor .torch .algorithms .smooth_quant import recover_model_from_json
@@ -216,6 +210,7 @@ def test_sq_save_load_with_quantize_API(self):
216210
217211 # compare saved json file
218212 fp32_model = copy .deepcopy (model )
213+ # quant_config.folding = True is not allowed to recover with json because it will update model weights
219214 loaded_model = recover_model_from_json (fp32_model , "saved_results/qconfig.json" , example_inputs = example_inputs )
220215 loaded_out = loaded_model (example_inputs )
221216 assert torch .allclose (inc_out , loaded_out , atol = 1e-05 ), "Unexpected result. Please double check."
0 commit comments