From ac20949b549d02c34f35499ac2f652d737f6a6d8 Mon Sep 17 00:00:00 2001 From: dafnapension Date: Wed, 20 Aug 2025 11:37:15 +0300 Subject: [PATCH 1/2] fixed mmmu by necessary filters Signed-off-by: dafnapension --- prepare/cards/mmmu.py | 13 +++++++++++-- src/unitxt/catalog/cards/mmmu/accounting.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/agriculture.json | 18 ++++++++++++++++++ .../mmmu/architecture_and_engineering.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/art.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/art_theory.json | 18 ++++++++++++++++++ .../cards/mmmu/basic_medical_science.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/biology.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/chemistry.json | 18 ++++++++++++++++++ .../catalog/cards/mmmu/clinical_medicine.json | 18 ++++++++++++++++++ .../catalog/cards/mmmu/computer_science.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/design.json | 18 ++++++++++++++++++ .../diagnostics_and_laboratory_medicine.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/economics.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/electronics.json | 18 ++++++++++++++++++ .../catalog/cards/mmmu/energy_and_power.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/finance.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/geography.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/history.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/literature.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/manage.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/marketing.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/materials.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/math.json | 18 ++++++++++++++++++ .../cards/mmmu/mechanical_engineering.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/music.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/pharmacy.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/physics.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/psychology.json | 18 ++++++++++++++++++ .../catalog/cards/mmmu/public_health.json | 18 ++++++++++++++++++ src/unitxt/catalog/cards/mmmu/sociology.json | 18 ++++++++++++++++++ 31 files changed, 551 insertions(+), 2 deletions(-) diff --git a/prepare/cards/mmmu.py b/prepare/cards/mmmu.py index 11005459f4..405a83af5f 100644 --- a/prepare/cards/mmmu.py +++ b/prepare/cards/mmmu.py @@ -1,7 +1,12 @@ from unitxt.blocks import LoadHF, TaskCard from unitxt.catalog import add_to_catalog from unitxt.collections_operators import Filter -from unitxt.operators import ListFieldValues, MapValues +from unitxt.operators import ( + FilterByCondition, + FilterByExpression, + ListFieldValues, + MapValues, +) from unitxt.processors import LiteralEval, Lower from unitxt.splitters import RenameSplits from unitxt.string_operators import MapReplace @@ -40,6 +45,8 @@ "Sociology", ] +mapping = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4, "?": None} + for name in config_names: card = TaskCard( loader=LoadHF( @@ -60,10 +67,12 @@ ), LiteralEval(field="choices"), Lower(field="subfield", to_field="topic"), + FilterByCondition(values={"answer": list(mapping.keys())}, condition="in"), MapValues( field="answer", - mapping={"A": 0, "B": 1, "C": 2, "D": 3, "E": 4, "?": None}, + mapping=mapping, ), + FilterByExpression(expression="answer < len(choices)"), ], task="tasks.qa.multiple_choice.with_topic", templates="templates.qa.multiple_choice.with_topic.all", diff --git a/src/unitxt/catalog/cards/mmmu/accounting.json b/src/unitxt/catalog/cards/mmmu/accounting.json index abe0958bb8..c376230581 100644 --- a/src/unitxt/catalog/cards/mmmu/accounting.json +++ b/src/unitxt/catalog/cards/mmmu/accounting.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/agriculture.json b/src/unitxt/catalog/cards/mmmu/agriculture.json index 1a78a73478..367ee1a370 100644 --- a/src/unitxt/catalog/cards/mmmu/agriculture.json +++ b/src/unitxt/catalog/cards/mmmu/agriculture.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json b/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json index a1cc754ee9..459dd40b4c 100644 --- a/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json +++ b/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/art.json b/src/unitxt/catalog/cards/mmmu/art.json index b02abeb36c..d54e598abf 100644 --- a/src/unitxt/catalog/cards/mmmu/art.json +++ b/src/unitxt/catalog/cards/mmmu/art.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/art_theory.json b/src/unitxt/catalog/cards/mmmu/art_theory.json index f4dac2cadf..ed34997237 100644 --- a/src/unitxt/catalog/cards/mmmu/art_theory.json +++ b/src/unitxt/catalog/cards/mmmu/art_theory.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/basic_medical_science.json b/src/unitxt/catalog/cards/mmmu/basic_medical_science.json index 9af96071e4..c9a5e49fa3 100644 --- a/src/unitxt/catalog/cards/mmmu/basic_medical_science.json +++ b/src/unitxt/catalog/cards/mmmu/basic_medical_science.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/biology.json b/src/unitxt/catalog/cards/mmmu/biology.json index a1735098fb..eb64e107ac 100644 --- a/src/unitxt/catalog/cards/mmmu/biology.json +++ b/src/unitxt/catalog/cards/mmmu/biology.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/chemistry.json b/src/unitxt/catalog/cards/mmmu/chemistry.json index 50b26e6ea0..bcff30e891 100644 --- a/src/unitxt/catalog/cards/mmmu/chemistry.json +++ b/src/unitxt/catalog/cards/mmmu/chemistry.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/clinical_medicine.json b/src/unitxt/catalog/cards/mmmu/clinical_medicine.json index 329c2dcf59..3424c4689f 100644 --- a/src/unitxt/catalog/cards/mmmu/clinical_medicine.json +++ b/src/unitxt/catalog/cards/mmmu/clinical_medicine.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/computer_science.json b/src/unitxt/catalog/cards/mmmu/computer_science.json index 396ea95c62..068ffe3253 100644 --- a/src/unitxt/catalog/cards/mmmu/computer_science.json +++ b/src/unitxt/catalog/cards/mmmu/computer_science.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/design.json b/src/unitxt/catalog/cards/mmmu/design.json index 52b8f2b022..15ed8b4a2d 100644 --- a/src/unitxt/catalog/cards/mmmu/design.json +++ b/src/unitxt/catalog/cards/mmmu/design.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json b/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json index 84225d6a0e..31d0a8397a 100644 --- a/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json +++ b/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/economics.json b/src/unitxt/catalog/cards/mmmu/economics.json index 8f7a690feb..5482d00ec1 100644 --- a/src/unitxt/catalog/cards/mmmu/economics.json +++ b/src/unitxt/catalog/cards/mmmu/economics.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/electronics.json b/src/unitxt/catalog/cards/mmmu/electronics.json index 8c709f00b1..87fb6f2faf 100644 --- a/src/unitxt/catalog/cards/mmmu/electronics.json +++ b/src/unitxt/catalog/cards/mmmu/electronics.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/energy_and_power.json b/src/unitxt/catalog/cards/mmmu/energy_and_power.json index 81c2bdfcf4..80d4ef22a2 100644 --- a/src/unitxt/catalog/cards/mmmu/energy_and_power.json +++ b/src/unitxt/catalog/cards/mmmu/energy_and_power.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/finance.json b/src/unitxt/catalog/cards/mmmu/finance.json index 52a916d0d0..e8174bb508 100644 --- a/src/unitxt/catalog/cards/mmmu/finance.json +++ b/src/unitxt/catalog/cards/mmmu/finance.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/geography.json b/src/unitxt/catalog/cards/mmmu/geography.json index 2abc9048f4..31ca21e3cb 100644 --- a/src/unitxt/catalog/cards/mmmu/geography.json +++ b/src/unitxt/catalog/cards/mmmu/geography.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/history.json b/src/unitxt/catalog/cards/mmmu/history.json index a54f9a7896..790e9ccae7 100644 --- a/src/unitxt/catalog/cards/mmmu/history.json +++ b/src/unitxt/catalog/cards/mmmu/history.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/literature.json b/src/unitxt/catalog/cards/mmmu/literature.json index c3c4c9507e..64829e4689 100644 --- a/src/unitxt/catalog/cards/mmmu/literature.json +++ b/src/unitxt/catalog/cards/mmmu/literature.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/manage.json b/src/unitxt/catalog/cards/mmmu/manage.json index 9941f90f45..08c61e9c4d 100644 --- a/src/unitxt/catalog/cards/mmmu/manage.json +++ b/src/unitxt/catalog/cards/mmmu/manage.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/marketing.json b/src/unitxt/catalog/cards/mmmu/marketing.json index b917e8826d..21cf0da6f7 100644 --- a/src/unitxt/catalog/cards/mmmu/marketing.json +++ b/src/unitxt/catalog/cards/mmmu/marketing.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/materials.json b/src/unitxt/catalog/cards/mmmu/materials.json index 5c589dbd60..a04fe30baa 100644 --- a/src/unitxt/catalog/cards/mmmu/materials.json +++ b/src/unitxt/catalog/cards/mmmu/materials.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/math.json b/src/unitxt/catalog/cards/mmmu/math.json index 2f1b6a8d7c..9e60201fdd 100644 --- a/src/unitxt/catalog/cards/mmmu/math.json +++ b/src/unitxt/catalog/cards/mmmu/math.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json b/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json index 72539fcbcb..795ac6c8e9 100644 --- a/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json +++ b/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/music.json b/src/unitxt/catalog/cards/mmmu/music.json index 7a3ac95c92..2df9a81a5d 100644 --- a/src/unitxt/catalog/cards/mmmu/music.json +++ b/src/unitxt/catalog/cards/mmmu/music.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/pharmacy.json b/src/unitxt/catalog/cards/mmmu/pharmacy.json index 143f323f99..e13cfa2e97 100644 --- a/src/unitxt/catalog/cards/mmmu/pharmacy.json +++ b/src/unitxt/catalog/cards/mmmu/pharmacy.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/physics.json b/src/unitxt/catalog/cards/mmmu/physics.json index 3b433b6437..987215dbc0 100644 --- a/src/unitxt/catalog/cards/mmmu/physics.json +++ b/src/unitxt/catalog/cards/mmmu/physics.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/psychology.json b/src/unitxt/catalog/cards/mmmu/psychology.json index 1bdb6bb679..763e3ce482 100644 --- a/src/unitxt/catalog/cards/mmmu/psychology.json +++ b/src/unitxt/catalog/cards/mmmu/psychology.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/public_health.json b/src/unitxt/catalog/cards/mmmu/public_health.json index 87ad264852..94c04b3dbe 100644 --- a/src/unitxt/catalog/cards/mmmu/public_health.json +++ b/src/unitxt/catalog/cards/mmmu/public_health.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/sociology.json b/src/unitxt/catalog/cards/mmmu/sociology.json index 27f654f6ca..d58cf2bc7b 100644 --- a/src/unitxt/catalog/cards/mmmu/sociology.json +++ b/src/unitxt/catalog/cards/mmmu/sociology.json @@ -61,6 +61,20 @@ "field": "subfield", "to_field": "topic" }, + { + "__type__": "filter_by_condition", + "values": { + "answer": [ + "A", + "B", + "C", + "D", + "E", + "?" + ] + }, + "condition": "in" + }, { "__type__": "map_values", "field": "answer", @@ -72,6 +86,10 @@ "E": 4, "?": null } + }, + { + "__type__": "filter_by_expression", + "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", From 2701950d1921953dd618508e7233455573245de6 Mon Sep 17 00:00:00 2001 From: dafnapension Date: Mon, 25 Aug 2025 12:10:37 +0300 Subject: [PATCH 2/2] cook options when empty rather than filter out Signed-off-by: dafnapension --- prepare/cards/mmmu.py | 15 ++++++--- src/unitxt/catalog/cards/mmmu/accounting.json | 33 ++++++++----------- .../catalog/cards/mmmu/agriculture.json | 33 ++++++++----------- .../mmmu/architecture_and_engineering.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/art.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/art_theory.json | 33 ++++++++----------- .../cards/mmmu/basic_medical_science.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/biology.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/chemistry.json | 33 ++++++++----------- .../catalog/cards/mmmu/clinical_medicine.json | 33 ++++++++----------- .../catalog/cards/mmmu/computer_science.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/design.json | 33 ++++++++----------- .../diagnostics_and_laboratory_medicine.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/economics.json | 33 ++++++++----------- .../catalog/cards/mmmu/electronics.json | 33 ++++++++----------- .../catalog/cards/mmmu/energy_and_power.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/finance.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/geography.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/history.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/literature.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/manage.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/marketing.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/materials.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/math.json | 33 ++++++++----------- .../cards/mmmu/mechanical_engineering.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/music.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/pharmacy.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/physics.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/psychology.json | 33 ++++++++----------- .../catalog/cards/mmmu/public_health.json | 33 ++++++++----------- src/unitxt/catalog/cards/mmmu/sociology.json | 33 ++++++++----------- 31 files changed, 430 insertions(+), 575 deletions(-) diff --git a/prepare/cards/mmmu.py b/prepare/cards/mmmu.py index 405a83af5f..52ce5b5ca6 100644 --- a/prepare/cards/mmmu.py +++ b/prepare/cards/mmmu.py @@ -2,8 +2,7 @@ from unitxt.catalog import add_to_catalog from unitxt.collections_operators import Filter from unitxt.operators import ( - FilterByCondition, - FilterByExpression, + ExecuteExpression, ListFieldValues, MapValues, ) @@ -45,7 +44,7 @@ "Sociology", ] -mapping = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4, "?": None} +mapping = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4, "F": 5, "G": 6, "H": 7, "I": 8} for name in config_names: card = TaskCard( @@ -58,6 +57,14 @@ fields=[f"image_{i}" for i in range(1, 8)], to_field="media/images" ), Filter(field="media/images", values=[None]), + ExecuteExpression( + expression="options if options != '[]' else '[\"'+answer+'\"]'", + to_field="options", + ), + ExecuteExpression( + expression="'A' if options == '[\"'+answer+'\"]' else answer", + to_field="answer", + ), MapReplace( field_to_field={"question": "question", "options": "choices"}, mapping={ @@ -67,12 +74,10 @@ ), LiteralEval(field="choices"), Lower(field="subfield", to_field="topic"), - FilterByCondition(values={"answer": list(mapping.keys())}, condition="in"), MapValues( field="answer", mapping=mapping, ), - FilterByExpression(expression="answer < len(choices)"), ], task="tasks.qa.multiple_choice.with_topic", templates="templates.qa.multiple_choice.with_topic.all", diff --git a/src/unitxt/catalog/cards/mmmu/accounting.json b/src/unitxt/catalog/cards/mmmu/accounting.json index c376230581..5bc4b67c41 100644 --- a/src/unitxt/catalog/cards/mmmu/accounting.json +++ b/src/unitxt/catalog/cards/mmmu/accounting.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/agriculture.json b/src/unitxt/catalog/cards/mmmu/agriculture.json index 367ee1a370..f99726a2c2 100644 --- a/src/unitxt/catalog/cards/mmmu/agriculture.json +++ b/src/unitxt/catalog/cards/mmmu/agriculture.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json b/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json index 459dd40b4c..ca69ddfa4b 100644 --- a/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json +++ b/src/unitxt/catalog/cards/mmmu/architecture_and_engineering.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/art.json b/src/unitxt/catalog/cards/mmmu/art.json index d54e598abf..c057c23ab5 100644 --- a/src/unitxt/catalog/cards/mmmu/art.json +++ b/src/unitxt/catalog/cards/mmmu/art.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/art_theory.json b/src/unitxt/catalog/cards/mmmu/art_theory.json index ed34997237..bcf253c3e5 100644 --- a/src/unitxt/catalog/cards/mmmu/art_theory.json +++ b/src/unitxt/catalog/cards/mmmu/art_theory.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/basic_medical_science.json b/src/unitxt/catalog/cards/mmmu/basic_medical_science.json index c9a5e49fa3..06e16c746c 100644 --- a/src/unitxt/catalog/cards/mmmu/basic_medical_science.json +++ b/src/unitxt/catalog/cards/mmmu/basic_medical_science.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/biology.json b/src/unitxt/catalog/cards/mmmu/biology.json index eb64e107ac..e9fdfcb7c0 100644 --- a/src/unitxt/catalog/cards/mmmu/biology.json +++ b/src/unitxt/catalog/cards/mmmu/biology.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/chemistry.json b/src/unitxt/catalog/cards/mmmu/chemistry.json index bcff30e891..5f196528fc 100644 --- a/src/unitxt/catalog/cards/mmmu/chemistry.json +++ b/src/unitxt/catalog/cards/mmmu/chemistry.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/clinical_medicine.json b/src/unitxt/catalog/cards/mmmu/clinical_medicine.json index 3424c4689f..236195f773 100644 --- a/src/unitxt/catalog/cards/mmmu/clinical_medicine.json +++ b/src/unitxt/catalog/cards/mmmu/clinical_medicine.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/computer_science.json b/src/unitxt/catalog/cards/mmmu/computer_science.json index 068ffe3253..34991d8200 100644 --- a/src/unitxt/catalog/cards/mmmu/computer_science.json +++ b/src/unitxt/catalog/cards/mmmu/computer_science.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/design.json b/src/unitxt/catalog/cards/mmmu/design.json index 15ed8b4a2d..18cf7dcd74 100644 --- a/src/unitxt/catalog/cards/mmmu/design.json +++ b/src/unitxt/catalog/cards/mmmu/design.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json b/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json index 31d0a8397a..91bcbb57eb 100644 --- a/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json +++ b/src/unitxt/catalog/cards/mmmu/diagnostics_and_laboratory_medicine.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/economics.json b/src/unitxt/catalog/cards/mmmu/economics.json index 5482d00ec1..12b4e0e325 100644 --- a/src/unitxt/catalog/cards/mmmu/economics.json +++ b/src/unitxt/catalog/cards/mmmu/economics.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/electronics.json b/src/unitxt/catalog/cards/mmmu/electronics.json index 87fb6f2faf..3d21d43b68 100644 --- a/src/unitxt/catalog/cards/mmmu/electronics.json +++ b/src/unitxt/catalog/cards/mmmu/electronics.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/energy_and_power.json b/src/unitxt/catalog/cards/mmmu/energy_and_power.json index 80d4ef22a2..e788d796df 100644 --- a/src/unitxt/catalog/cards/mmmu/energy_and_power.json +++ b/src/unitxt/catalog/cards/mmmu/energy_and_power.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/finance.json b/src/unitxt/catalog/cards/mmmu/finance.json index e8174bb508..3d4369d212 100644 --- a/src/unitxt/catalog/cards/mmmu/finance.json +++ b/src/unitxt/catalog/cards/mmmu/finance.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/geography.json b/src/unitxt/catalog/cards/mmmu/geography.json index 31ca21e3cb..0ad9566e36 100644 --- a/src/unitxt/catalog/cards/mmmu/geography.json +++ b/src/unitxt/catalog/cards/mmmu/geography.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/history.json b/src/unitxt/catalog/cards/mmmu/history.json index 790e9ccae7..8a73a8a439 100644 --- a/src/unitxt/catalog/cards/mmmu/history.json +++ b/src/unitxt/catalog/cards/mmmu/history.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/literature.json b/src/unitxt/catalog/cards/mmmu/literature.json index 64829e4689..a40f26d922 100644 --- a/src/unitxt/catalog/cards/mmmu/literature.json +++ b/src/unitxt/catalog/cards/mmmu/literature.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/manage.json b/src/unitxt/catalog/cards/mmmu/manage.json index 08c61e9c4d..dc0290d9f6 100644 --- a/src/unitxt/catalog/cards/mmmu/manage.json +++ b/src/unitxt/catalog/cards/mmmu/manage.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/marketing.json b/src/unitxt/catalog/cards/mmmu/marketing.json index 21cf0da6f7..8467dc6cfe 100644 --- a/src/unitxt/catalog/cards/mmmu/marketing.json +++ b/src/unitxt/catalog/cards/mmmu/marketing.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/materials.json b/src/unitxt/catalog/cards/mmmu/materials.json index a04fe30baa..e82ce62c17 100644 --- a/src/unitxt/catalog/cards/mmmu/materials.json +++ b/src/unitxt/catalog/cards/mmmu/materials.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/math.json b/src/unitxt/catalog/cards/mmmu/math.json index 9e60201fdd..c526d70919 100644 --- a/src/unitxt/catalog/cards/mmmu/math.json +++ b/src/unitxt/catalog/cards/mmmu/math.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json b/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json index 795ac6c8e9..ddfbb197ef 100644 --- a/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json +++ b/src/unitxt/catalog/cards/mmmu/mechanical_engineering.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/music.json b/src/unitxt/catalog/cards/mmmu/music.json index 2df9a81a5d..1acf95c47d 100644 --- a/src/unitxt/catalog/cards/mmmu/music.json +++ b/src/unitxt/catalog/cards/mmmu/music.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/pharmacy.json b/src/unitxt/catalog/cards/mmmu/pharmacy.json index e13cfa2e97..93be2b79cb 100644 --- a/src/unitxt/catalog/cards/mmmu/pharmacy.json +++ b/src/unitxt/catalog/cards/mmmu/pharmacy.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/physics.json b/src/unitxt/catalog/cards/mmmu/physics.json index 987215dbc0..d990bbc90a 100644 --- a/src/unitxt/catalog/cards/mmmu/physics.json +++ b/src/unitxt/catalog/cards/mmmu/physics.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/psychology.json b/src/unitxt/catalog/cards/mmmu/psychology.json index 763e3ce482..bc116cbaaf 100644 --- a/src/unitxt/catalog/cards/mmmu/psychology.json +++ b/src/unitxt/catalog/cards/mmmu/psychology.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/public_health.json b/src/unitxt/catalog/cards/mmmu/public_health.json index 94c04b3dbe..24b476f09c 100644 --- a/src/unitxt/catalog/cards/mmmu/public_health.json +++ b/src/unitxt/catalog/cards/mmmu/public_health.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic", diff --git a/src/unitxt/catalog/cards/mmmu/sociology.json b/src/unitxt/catalog/cards/mmmu/sociology.json index d58cf2bc7b..399a8f90fd 100644 --- a/src/unitxt/catalog/cards/mmmu/sociology.json +++ b/src/unitxt/catalog/cards/mmmu/sociology.json @@ -36,6 +36,16 @@ null ] }, + { + "__type__": "execute_expression", + "expression": "options if options != '[]' else '[\"'+answer+'\"]'", + "to_field": "options" + }, + { + "__type__": "execute_expression", + "expression": "'A' if options == '[\"'+answer+'\"]' else answer", + "to_field": "answer" + }, { "__type__": "map_replace", "field_to_field": { @@ -61,20 +71,6 @@ "field": "subfield", "to_field": "topic" }, - { - "__type__": "filter_by_condition", - "values": { - "answer": [ - "A", - "B", - "C", - "D", - "E", - "?" - ] - }, - "condition": "in" - }, { "__type__": "map_values", "field": "answer", @@ -84,12 +80,11 @@ "C": 2, "D": 3, "E": 4, - "?": null + "F": 5, + "G": 6, + "H": 7, + "I": 8 } - }, - { - "__type__": "filter_by_expression", - "expression": "answer < len(choices)" } ], "task": "tasks.qa.multiple_choice.with_topic",