From 9f3b7ac100d3451c155e70aec22a7d515345c0e4 Mon Sep 17 00:00:00 2001
From: Jan Philipp Harries <jan@jph.me>
Date: Sun, 2 Jun 2024 01:21:24 +0200
Subject: [PATCH 1/3] first version german benchmark

---
 community_tasks/gerbench.py | 102 ++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 community_tasks/gerbench.py

diff --git a/community_tasks/gerbench.py b/community_tasks/gerbench.py
new file mode 100644
index 000000000..47d40ae8b
--- /dev/null
+++ b/community_tasks/gerbench.py
@@ -0,0 +1,102 @@
+"""
+Custom evaluation tasks for lighteval.
+
+This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
+This module implements the 4 tasks of deutsche-telekom/Ger-RAG-eval.
+See: https://huggingface.co/datasets/deutsche-telekom/Ger-RAG-eval
+"""
+
+from lighteval.tasks.lighteval_task import LightevalTaskConfig
+from lighteval.tasks.requests import Doc
+
+
+# Task 1: Sentence Errors
+# Detenct Sentence in German
+# The task is to decide which one is the correct sentence in german
+task1 = LightevalTaskConfig(
+    name="gerbench:sentence_errors",
+    prompt_function="prompt_fn_sentence_errors",
+    suite=["community"],
+    hf_repo="ellamind/gerbench_sentence_errors",
+    hf_subset=None,
+    hf_avail_splits=["test"],
+    evaluation_splits=["test"],
+    metric=["loglikelihood_acc"],
+    version=0,
+)
+
+# Task 2: Choose context by question.
+# Given is a question and 4 contexts.
+# The task is to decide which context can answer the question.
+task2 = LightevalTaskConfig(
+    name="gerbench:next_word",
+    prompt_function="prompt_fn_next_word",
+    suite=["community"],
+    hf_repo="ellamind/gerbench_next_words",
+    hf_subset=None,
+    hf_avail_splits=["test"],
+    evaluation_splits=["test"],
+    metric=["loglikelihood_acc"],
+    version=1,
+)
+
+
+def prompt_fn_sentence_errors(line, task_name: str = None):
+    instruction = """\
+Es sind vier deutschsprachige Sätze unter A, B, C und D gegeben. Drei enthalten einen kleinen Fehler und einer ist der Orirignalsatz. Bitte antworte mit dem Buchstaben (A, B, C oder D) des Satzes, der KEINEN Fehler enthält!"""
+
+    query_template = """
+A: {choice_a}
+B: {choice_b}
+C: {choice_c}
+D: {choice_d}
+
+Antwort:"""
+    query = instruction + query_template.format(
+        choice_a=line["answer1"],
+        choice_b=line["answer2"],
+        choice_c=line["answer3"],
+        choice_d=line["answer4"],
+    )
+    choices = ["A", "B", "C", "D"]
+    answer_mapping = {"answer1": "A", "answer2": "B", "answer3": "C", "answer4": "D"}
+    return Doc(
+        task_name=task_name,
+        instruction=instruction,
+        query=query,
+        choices=choices,
+        gold_index=choices.index(answer_mapping[line["golden"]]),
+    )
+
+
+def prompt_fn_next_word(line, task_name: str = None):
+    instruction = """\
+Bitte setze den folgenden deutschsprachiger Satz korrekt fort!"""
+
+    query_template = """
+{satz_bis_zum_letzten_wort} """
+    query = instruction + query_template.format(
+        satz_bis_zum_letzten_wort=line["satz_bis_zum_letzten_wort"],
+    )
+    choices = [line["answer1"], line["answer2"], line["answer3"], line["answer4"]]
+    return Doc(
+        task_name=task_name,
+        instruction=instruction,
+        query=query,
+        choices=choices,
+        gold_index=choices.index(line[line["golden"]]),
+    )
+
+
+# STORE YOUR EVALS
+_TASKS = [task1, task2]
+
+
+# MODULE LOGIC
+# You should not need to touch this
+# Convert to dict for lighteval
+TASKS_TABLE = [task.as_dict() for task in _TASKS]
+
+if __name__ == "__main__":
+    print(t["name"] for t in TASKS_TABLE)
+    print(len(TASKS_TABLE))

From 7d3037db1478709179a86d84530e3d563684d0f8 Mon Sep 17 00:00:00 2001
From: Jan Philipp Harries <jan@jph.me>
Date: Sun, 2 Jun 2024 01:25:24 +0200
Subject: [PATCH 2/3] update tasklist

---
 examples/tasks/all_gerbench_tasks.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 examples/tasks/all_gerbench_tasks.txt

diff --git a/examples/tasks/all_gerbench_tasks.txt b/examples/tasks/all_gerbench_tasks.txt
new file mode 100644
index 000000000..4c460d65a
--- /dev/null
+++ b/examples/tasks/all_gerbench_tasks.txt
@@ -0,0 +1,2 @@
+community|gerbench:sentence_errors|0|0
+community|gerbench:next_word|0|0
\ No newline at end of file

From 47a68f254af1234900c318c2ee0dc3e4bae10569 Mon Sep 17 00:00:00 2001
From: Jan Philipp Harries <jan@jph.me>
Date: Sun, 2 Jun 2024 01:49:23 +0200
Subject: [PATCH 3/3] fix small error and add copyright notice

---
 community_tasks/gerbench.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/community_tasks/gerbench.py b/community_tasks/gerbench.py
index 47d40ae8b..22306f73b 100644
--- a/community_tasks/gerbench.py
+++ b/community_tasks/gerbench.py
@@ -1,3 +1,27 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+# Copyright (c) 2024 Philip May, Deutsche Telekom AG
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# ruff: noqa: F405, F403, F401
 """
 Custom evaluation tasks for lighteval.
 
@@ -32,7 +56,7 @@
     name="gerbench:next_word",
     prompt_function="prompt_fn_next_word",
     suite=["community"],
-    hf_repo="ellamind/gerbench_next_words",
+    hf_repo="ellamind/gerbench_next_word",
     hf_subset=None,
     hf_avail_splits=["test"],
     evaluation_splits=["test"],