Skip to content

Commit eb2b20b

Browse files
committed
initial unitxt evaluator
Signed-off-by: Roni Friedman-Melamed <Roni.friedman-melamed@il.ibm.com>
1 parent 61274a6 commit eb2b20b

File tree

4 files changed

+91
-0
lines changed

4 files changed

+91
-0
lines changed

my_tasks/my_task.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
task: my_task
2+
include: unitxt
3+
recipe: card=cards.wnli,template=templates.classification.multi_class.relation.default,max_train_instances=5,loader_limit=20,num_demos=3,demos_pool_size=10

my_tasks/unitxt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
class: !function /Users/ronches/miniforge3/envs/lmeval/lib/python3.10/site-packages/lm_eval/tasks/unitxt/task.Unitxt

src/instructlab/eval/unitxt.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Unitxt - Flexible, Shareable and Reusable Data Preparation and Evaluation for Generative AI
3+
https://github.com/IBM/unitxt
4+
https://arxiv.org/abs/2401.14019
5+
"""
6+
7+
# Standard
8+
import os
9+
10+
# First Party
11+
from instructlab.eval.mmlu import MMLUBranchEvaluator
12+
13+
# Local
14+
from .logger_config import setup_logger
15+
16+
logger = setup_logger(__name__)
17+
18+
class UnitxtEvaluator(MMLUBranchEvaluator):
19+
name = "unitxt"
20+
def __init__(
21+
self,
22+
model_path,
23+
tasks_dir: str,
24+
tasks: list[str],
25+
# unitxt_recipe: str,
26+
):
27+
# tasks,tasks_dir = self.prepare_files(unitxt_recipe)
28+
super().__init__(
29+
model_path = model_path,
30+
tasks_dir = tasks_dir,
31+
tasks = tasks,
32+
few_shots = 0
33+
)
34+
35+
def prepare_files(self, unitxt_recipe)->tuple:
36+
tasks = ''
37+
tasks_dir = ''
38+
return tasks,tasks_dir
39+
40+
def run(self,server_url: str | None = None) -> tuple:
41+
"""
42+
Runs evaluation
43+
44+
Returns:
45+
overall_scores Average scores for the task group
46+
individual_scores Individual scores for each task in the task group
47+
"""
48+
logger.debug(locals())
49+
os.environ["TOKENIZERS_PARALLELISM"] = "true"
50+
results = self._run_mmlu(server_url=server_url)
51+
with open('my_tasks/output.txt', 'w') as f:
52+
print(results, file=f)
53+
taskname = self.tasks[0]
54+
global_scores = results[taskname]
55+
global_scores.pop('alias')
56+
instance_scores = None
57+
# instances = results['samples'][taskname]
58+
# instance_scores = {}
59+
# metrics = [metric.replace('metrics.','') for metric in instances[0]['doc']['metrics']]
60+
# for i,instance in enumerate(instances):
61+
# scores = {}
62+
# for metric in metrics:
63+
# scores[metric] = instance[metric][0]
64+
# instance_scores[i] = scores
65+
return global_scores,instance_scores

tests/test_unitxt.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# First Party
2+
from instructlab.eval.unitxt import UnitxtEvaluator
3+
4+
5+
def test_unitxt():
6+
print("===> Executing 'test_unitxt'...")
7+
try:
8+
model_path = "instructlab/granite-7b-lab"
9+
tasks = ["my_task"]
10+
unitxt = UnitxtEvaluator(
11+
model_path=model_path, tasks_dir='./my_tasks/', tasks=tasks
12+
)
13+
overall_score, _ = unitxt.run()
14+
print(overall_score)
15+
except Exception as exc:
16+
print(f"'test_unitxt_branch' failed: {exc}")
17+
return False
18+
return True
19+
20+
21+
if __name__ == "__main__":
22+
assert test_unitxt() == True

0 commit comments

Comments
 (0)