From 7aadacfe8edbe8172fb8e38178822e9d618aea73 Mon Sep 17 00:00:00 2001 From: Mandlin Sarah Date: Fri, 30 Aug 2024 17:14:57 -0700 Subject: [PATCH] Refactored to handle empty 'pred' lists gracefully --- evaluation/evaluate.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/evaluation/evaluate.py b/evaluation/evaluate.py index cee60453..3e45c4b3 100755 --- a/evaluation/evaluate.py +++ b/evaluation/evaluate.py @@ -55,27 +55,33 @@ def evaluate(data_name, prompt_type, samples: list=None, file_path: str=None, ma idx = 0 score_mat = [] for sample in samples: + if not sample['pred']: # Skip samples with empty 'pred' list + continue sample['score'] = scores[idx: idx+len(sample['pred'])] assert len(sample['score']) == len(sample['pred']) score_mat.append(sample['score']) idx += len(sample['pred']) - max_len = max([len(s) for s in score_mat]) + if not score_mat: # Handle case where score_mat might be empty + col_means = np.array([]) + mean_score = [] + else: + max_len = max([len(s) for s in score_mat]) - for i, s in enumerate(score_mat): - if len(s) < max_len: - score_mat[i] = s + [s[-1]] * (max_len - len(s)) # pad + for i, s in enumerate(score_mat): + if len(s) < max_len: + score_mat[i] = s + [s[-1]] * (max_len - len(s)) # pad - # output mean of each column of scores - col_means= np.array(score_mat).mean(axis=0) - mean_score = list(np.round(col_means * 100, decimals=1)) + # output mean of each column of scores + col_means= np.array(score_mat).mean(axis=0) + mean_score = list(np.round(col_means * 100, decimals=1)) result_json = { "num_samples": len(samples), "num_scores": len(scores), "timeout_samples": timeout_cnt, - "empty_samples": len([s for s in samples if not s['pred'][-1]]), - "acc": mean_score[0] + "empty_samples": len([s for s in samples if not s['pred']]), + "acc": mean_score[0] if mean_score else 0 } # each type score @@ -84,7 +90,8 @@ def evaluate(data_name, prompt_type, samples: list=None, file_path: str=None, ma for sample in samples: if sample['type'] not in type_scores: type_scores[sample['type']] = [] - type_scores[sample['type']].append(sample['score'][-1]) + if sample['pred']: # Add only if 'pred' is not empty + type_scores[sample['type']].append(sample['score'][-1]) type_scores = {k: np.round(np.array(v).mean() * 100, decimals=1) for k, v in type_scores.items()} type_scores = {k: v for k, v in sorted(type_scores.items(), key=lambda item: item[0])} result_json['type_acc'] = type_scores @@ -107,3 +114,5 @@ def parse_args(): args = parse_args() evaluate(data_name=args.data_name, prompt_type=args.prompt_type, file_path=args.file_path, max_num_samples=args.max_num_samples, execute=args.execute) + +