@@ -219,7 +219,10 @@ def smooth_quant(
219219 self .sq = ORTSmoothQuant (self .pre_optimized_model , dataloader , self .reduce_range , self .backend )
220220 self .sq .record_max_info = record_max_info
221221 self .smooth_quant_model = self .sq .transform (** self .cur_sq_args )
222- logger .info ("Updated the pre-optimized model with smooth quant model." )
222+ if not record_max_info : # pragma: no cover
223+ logger .info ("Updated the pre-optimized model with smooth quant model." )
224+ else :
225+ logger .info ("Collected scale information for smooth quant." )
223226 # TODO double-check the smooth_quant_model and pre_optimized_model to make sure there no two fp32 model replicas
224227 self .pre_optimized_model = self .smooth_quant_model
225228 return self .smooth_quant_model
@@ -305,6 +308,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
305308 self .sq .model = tmp_model
306309 self .sq .record_max_info = False
307310 tmp_model = self .sq .transform (** self .cur_sq_args )
311+ logger .info ("Model is smooth quantized." )
308312
309313 iterations = tune_cfg .get ("calib_iteration" , 1 )
310314 calib_sampling_size = tune_cfg .get ("calib_sampling_size" , 1 )
@@ -1129,7 +1133,7 @@ def _replace_gemm_with_matmul(model):
11291133 from onnx import numpy_helper
11301134
11311135 if not isinstance (model , ONNXModel ):
1132- model = ONNXModel (model )
1136+ model = ONNXModel (model , ignore_warning = True )
11331137
11341138 for node in model .nodes ():
11351139 if node .op_type == "Gemm" :
0 commit comments