5252 help = "calibration iters." )
5353parser .add_argument ("--tasks" , nargs = '+' , default = ["lambada_openai" ,
5454 "hellaswag" , "winogrande" , "piqa" , "wikitext" ],
55- type = str , help = "tasks list for accuracy validation" )
55+ type = str , help = "tasks list for accuracy validation, text-generation and code-generation tasks are different. " )
5656parser .add_argument ("--peft_model_id" , type = str , default = None , help = "model_name_or_path of peft model" )
5757# ============SmoothQuant configs==============
5858parser .add_argument ("--sq" , action = "store_true" )
7878 this should align with your model config, \
7979 and your dataset builder args: args.pad_max_length' )
8080parser .add_argument ('--gptq_debug' , action = 'store_true' , help = 'Whether to use debug model ' )
81- # =======================================
81+ # ==============code generation args===========
82+ parser .add_argument ("--code_generation" , action = "store_true" )
83+ parser .add_argument ("--n_samples" , default = 200 , type = int )
84+ parser .add_argument (
85+ "--limit" , default = None , type = int , help = "Limit number of samples to eval"
86+ )
87+ parser .add_argument ("--allow_code_execution" , action = "store_true" )
88+ parser .add_argument ("--prefix" , default = "" )
89+ parser .add_argument ("--generation_only" , action = "store_true" )
90+ parser .add_argument ("--postprocess" , action = "store_false" )
91+ parser .add_argument ("--save_references" , action = "store_true" )
92+ parser .add_argument ("--save_generations" , action = "store_true" )
93+ parser .add_argument ("--instruction_tokens" , default = None )
94+ parser .add_argument ("--save_generations_path" , default = "generations.json" )
95+ parser .add_argument ("--load_generations_path" , default = None )
96+ parser .add_argument ("--metric_output_path" , default = "evaluation_results.json" )
97+ parser .add_argument ("--max_length_generation" , default = 512 , type = int )
98+ parser .add_argument ("--temperature" , default = 0.8 , type = float )
99+ parser .add_argument ("--top_p" , default = 0.8 , type = float )
100+ parser .add_argument ("--top_k" , default = 0 , type = int )
101+ parser .add_argument ("--do_sample" , action = "store_true" )
102+ parser .add_argument ("--check_references" , action = "store_true" )
103+ parser .add_argument ("--max_memory_per_gpu" , type = str , default = None )
104+ parser .add_argument (
105+ "--modeltype" ,
106+ default = "causal" ,
107+ help = "AutoModel to use, it can be causal or seq2seq" ,
108+ )
109+ parser .add_argument (
110+ "--limit_start" ,
111+ type = int ,
112+ default = 0 ,
113+ help = "Optional offset to start from when limiting the number of samples" ,
114+ )
82115
83116args = parser .parse_args ()
84117if args .ipex :
@@ -262,7 +295,7 @@ def calib_func(prepared_model):
262295 if args .gptq_debug :
263296 from neural_compressor .adaptor .torch_utils .weight_only import gptq_quantize
264297
265- conf = {
298+ gptq_conf = {
266299 ".*" : {
267300 'wbits' : args .woq_bits , # 1-8 bits
268301 'group_size' : args .woq_group_size , # -1 (per-channel)
@@ -272,20 +305,16 @@ def calib_func(prepared_model):
272305 }
273306 q_model_gptq_debug , gptq_config = gptq_quantize (
274307 user_model ,
275- weight_config = conf ,
308+ weight_config = gptq_conf ,
276309 dataloader = calib_dataloader ,
277310 nsamples = args .gptq_nsamples ,
278311 use_max_length = args .gptq_use_max_length ,
279- pad_max_length = args .gptq_pad_max_length
312+ pad_max_length = args .gptq_pad_max_length ,
280313 )
281- from intel_extension_for_transformers .llm .evaluation .lm_eval import evaluate
282314
283- results = evaluate (
284- model = "hf-causal" ,
285- model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
286- user_model = q_model_gptq_debug , tasks = ["lambada_openai" ],
287- batch_size = 4
288- )
315+ # save the fake quantized model
316+ os .makedirs (args .output_dir , exist_ok = True )
317+ torch .save (q_model_gptq_debug , os .path .join (args .output_dir , "gptq_best_model.pt" ))
289318 exit (0 )
290319
291320 else :
@@ -317,7 +346,6 @@ def calib_func(prepared_model):
317346 eval_dataset = load_dataset ('lambada' , split = 'validation' )
318347 evaluator = Evaluator (eval_dataset , tokenizer )
319348
320-
321349 def eval_func (model ):
322350 acc = evaluator .evaluate (model )
323351 return acc
@@ -347,15 +375,29 @@ def eval_func(model):
347375
348376if args .accuracy :
349377 user_model .eval ()
350- from intel_extension_for_transformers .llm .evaluation .lm_eval import evaluate
378+ if args .gptq_debug :
379+ user_model = torch .load (os .path .join (args .output_dir , "gptq_best_model.pt" ))
380+ if args .code_generation :
381+ from intel_extension_for_transformers .llm .evaluation .lm_code_eval import evaluate
382+ from transformers import AutoTokenizer
383+ tokenizer = AutoTokenizer .from_pretrained (args .model )
384+ results = evaluate (
385+ model = user_model ,
386+ tokenizer = tokenizer ,
387+ tasks = "," .join (args .tasks ),
388+ batch_size = args .batch_size ,
389+ args = args ,
390+ )
391+ else :
392+ from intel_extension_for_transformers .llm .evaluation .lm_eval import evaluate
393+ results = evaluate (
394+ model = "hf-causal" ,
395+ model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
396+ user_model = user_model ,
397+ batch_size = args .batch_size ,
398+ tasks = args .tasks ,
399+ )
351400
352- results = evaluate (
353- model = "hf-causal" ,
354- model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
355- user_model = user_model ,
356- batch_size = args .batch_size ,
357- tasks = args .tasks ,
358- )
359401 dumped = json .dumps (results , indent = 2 )
360402 if args .save_accuracy_path :
361403 with open (args .save_accuracy_path , "w" ) as f :
0 commit comments