From e1da2ea9da076c590179740efcd7d813ff319fc5 Mon Sep 17 00:00:00 2001 From: Ross Cutler <46252169+rosscutler@users.noreply.github.com> Date: Wed, 4 Jun 2025 14:35:09 -0700 Subject: [PATCH] fix: drop raters with low acceptance --- src/result_parser.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/result_parser.py b/src/result_parser.py index e9e0112..0e2e2b9 100644 --- a/src/result_parser.py +++ b/src/result_parser.py @@ -890,11 +890,13 @@ def evaluate_rater_performance(data, use_sessions, reject_on_failure=False): # rater_min_accepted_hits_current_test - grouped = df.groupby(['worker_id', 'accept_and_use']).size().unstack(fill_value=0).reset_index() - grouped = grouped.rename(columns={0: 'not_used_count', 1: 'used_count'}) - # check if not_used_count is in grouped - if 'not_used_count' in grouped.columns: - grouped['acceptance_rate'] = (grouped['used_count'] * 100)/(grouped['used_count'] + grouped['not_used_count']) + grouped = df.groupby(['worker_id', 'accept']).size().unstack(fill_value=0).reset_index() + grouped = grouped.rename(columns={0: 'rejected_count', 1: 'accepted_count'}) + # check if rejected_count is in grouped + if 'rejected_count' in grouped.columns: + grouped['acceptance_rate'] = (grouped['accepted_count'] * 100)/( + grouped['accepted_count'] + grouped['rejected_count'] + ) else: grouped['acceptance_rate'] = 100 #grouped.to_csv('tmp.csv') @@ -910,8 +912,8 @@ def evaluate_rater_performance(data, use_sessions, reject_on_failure=False): rater_min_accepted_hits_current_test = 0 grouped_rej = grouped[(grouped.acceptance_rate < rater_min_acceptance_rate_current_test) - | (grouped.used_count < rater_min_accepted_hits_current_test)] - n_submission_removed_only_for_performance = grouped_rej['used_count'].sum() + | (grouped.accepted_count < rater_min_accepted_hits_current_test)] + n_submission_removed_only_for_performance = grouped_rej['accepted_count'].sum() print(f'{n_submission_removed_only_for_performance} sessions are removed only becuase of performance criteria ({section}).') workers_list_to_remove = list(grouped_rej['worker_id'])