@@ -51,7 +51,7 @@ def __init__(
5151 self .unitxt_recipe = unitxt_recipe
5252
5353 def assign_tasks_dir (self , task_name ):
54- return f"{ TEMP_DIR_PREFIX } _{ task_name } "
54+ return os . path . join ( "eval_output" , f"{ TEMP_DIR_PREFIX } _{ task_name } " )
5555
5656 def assign_task_name (self ):
5757 return str (uuid4 ())
@@ -90,28 +90,30 @@ def run(self, server_url: str | None = None) -> tuple:
9090 self .prepare_unitxt_files ()
9191 logger .debug (locals ())
9292 os .environ ["TOKENIZERS_PARALLELISM" ] = "true"
93- results = self ._run_mmlu (server_url = server_url )
94- taskname = self .tasks [0 ]
95- global_scores = results ["results" ][taskname ]
96- global_scores .pop ("alias" )
9793 try :
98- instances = results ["samples" ][taskname ]
99- instance_scores = {}
100- metrics = [
101- metric .replace ("metrics." , "" )
102- for metric in instances [0 ]["doc" ]["metrics" ]
103- ]
104- for i , instance in enumerate (instances ):
105- scores = {}
106- for metric in metrics :
107- scores [metric ] = instance [metric ][0 ]
108- instance_scores [i ] = scores
109- except KeyError as e :
110- logger .error ("Error in extracting single instance scores" )
111- logger .error (e )
112- logger .error (e .__traceback__ )
113- instance_scores = None
114- self .remove_unitxt_files ()
94+ results = self ._run_mmlu (server_url = server_url )
95+ taskname = self .tasks [0 ]
96+ global_scores = results ["results" ][taskname ]
97+ global_scores .pop ("alias" )
98+ try :
99+ instances = results ["samples" ][taskname ]
100+ instance_scores = {}
101+ metrics = [
102+ metric .replace ("metrics." , "" )
103+ for metric in instances [0 ]["doc" ]["metrics" ]
104+ ]
105+ for i , instance in enumerate (instances ):
106+ scores = {}
107+ for metric in metrics :
108+ scores [metric ] = instance [metric ][0 ]
109+ instance_scores [i ] = scores
110+ except KeyError as e :
111+ logger .error ("Error in extracting single instance scores" )
112+ logger .error (e )
113+ logger .error (e .__traceback__ )
114+ instance_scores = None
115+ finally :
116+ self .remove_unitxt_files ()
115117 return global_scores , instance_scores
116118
117119
0 commit comments