From 9f3b7ac100d3451c155e70aec22a7d515345c0e4 Mon Sep 17 00:00:00 2001 From: Jan Philipp Harries Date: Sun, 2 Jun 2024 01:21:24 +0200 Subject: [PATCH 1/3] first version german benchmark --- community_tasks/gerbench.py | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 community_tasks/gerbench.py diff --git a/community_tasks/gerbench.py b/community_tasks/gerbench.py new file mode 100644 index 000000000..47d40ae8b --- /dev/null +++ b/community_tasks/gerbench.py @@ -0,0 +1,102 @@ +""" +Custom evaluation tasks for lighteval. + +This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval. +This module implements the 4 tasks of deutsche-telekom/Ger-RAG-eval. +See: https://huggingface.co/datasets/deutsche-telekom/Ger-RAG-eval +""" + +from lighteval.tasks.lighteval_task import LightevalTaskConfig +from lighteval.tasks.requests import Doc + + +# Task 1: Sentence Errors +# Detenct Sentence in German +# The task is to decide which one is the correct sentence in german +task1 = LightevalTaskConfig( + name="gerbench:sentence_errors", + prompt_function="prompt_fn_sentence_errors", + suite=["community"], + hf_repo="ellamind/gerbench_sentence_errors", + hf_subset=None, + hf_avail_splits=["test"], + evaluation_splits=["test"], + metric=["loglikelihood_acc"], + version=0, +) + +# Task 2: Choose context by question. +# Given is a question and 4 contexts. +# The task is to decide which context can answer the question. +task2 = LightevalTaskConfig( + name="gerbench:next_word", + prompt_function="prompt_fn_next_word", + suite=["community"], + hf_repo="ellamind/gerbench_next_words", + hf_subset=None, + hf_avail_splits=["test"], + evaluation_splits=["test"], + metric=["loglikelihood_acc"], + version=1, +) + + +def prompt_fn_sentence_errors(line, task_name: str = None): + instruction = """\ +Es sind vier deutschsprachige Sätze unter A, B, C und D gegeben. Drei enthalten einen kleinen Fehler und einer ist der Orirignalsatz. Bitte antworte mit dem Buchstaben (A, B, C oder D) des Satzes, der KEINEN Fehler enthält!""" + + query_template = """ +A: {choice_a} +B: {choice_b} +C: {choice_c} +D: {choice_d} + +Antwort:""" + query = instruction + query_template.format( + choice_a=line["answer1"], + choice_b=line["answer2"], + choice_c=line["answer3"], + choice_d=line["answer4"], + ) + choices = ["A", "B", "C", "D"] + answer_mapping = {"answer1": "A", "answer2": "B", "answer3": "C", "answer4": "D"} + return Doc( + task_name=task_name, + instruction=instruction, + query=query, + choices=choices, + gold_index=choices.index(answer_mapping[line["golden"]]), + ) + + +def prompt_fn_next_word(line, task_name: str = None): + instruction = """\ +Bitte setze den folgenden deutschsprachiger Satz korrekt fort!""" + + query_template = """ +{satz_bis_zum_letzten_wort} """ + query = instruction + query_template.format( + satz_bis_zum_letzten_wort=line["satz_bis_zum_letzten_wort"], + ) + choices = [line["answer1"], line["answer2"], line["answer3"], line["answer4"]] + return Doc( + task_name=task_name, + instruction=instruction, + query=query, + choices=choices, + gold_index=choices.index(line[line["golden"]]), + ) + + +# STORE YOUR EVALS +_TASKS = [task1, task2] + + +# MODULE LOGIC +# You should not need to touch this +# Convert to dict for lighteval +TASKS_TABLE = [task.as_dict() for task in _TASKS] + +if __name__ == "__main__": + print(t["name"] for t in TASKS_TABLE) + print(len(TASKS_TABLE)) From 7d3037db1478709179a86d84530e3d563684d0f8 Mon Sep 17 00:00:00 2001 From: Jan Philipp Harries Date: Sun, 2 Jun 2024 01:25:24 +0200 Subject: [PATCH 2/3] update tasklist --- examples/tasks/all_gerbench_tasks.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 examples/tasks/all_gerbench_tasks.txt diff --git a/examples/tasks/all_gerbench_tasks.txt b/examples/tasks/all_gerbench_tasks.txt new file mode 100644 index 000000000..4c460d65a --- /dev/null +++ b/examples/tasks/all_gerbench_tasks.txt @@ -0,0 +1,2 @@ +community|gerbench:sentence_errors|0|0 +community|gerbench:next_word|0|0 \ No newline at end of file From 47a68f254af1234900c318c2ee0dc3e4bae10569 Mon Sep 17 00:00:00 2001 From: Jan Philipp Harries Date: Sun, 2 Jun 2024 01:49:23 +0200 Subject: [PATCH 3/3] fix small error and add copyright notice --- community_tasks/gerbench.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/community_tasks/gerbench.py b/community_tasks/gerbench.py index 47d40ae8b..22306f73b 100644 --- a/community_tasks/gerbench.py +++ b/community_tasks/gerbench.py @@ -1,3 +1,27 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team +# Copyright (c) 2024 Philip May, Deutsche Telekom AG + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# ruff: noqa: F405, F403, F401 """ Custom evaluation tasks for lighteval. @@ -32,7 +56,7 @@ name="gerbench:next_word", prompt_function="prompt_fn_next_word", suite=["community"], - hf_repo="ellamind/gerbench_next_words", + hf_repo="ellamind/gerbench_next_word", hf_subset=None, hf_avail_splits=["test"], evaluation_splits=["test"],