revert bfcl to main, will be fixed in another PR, revert changes to xlam -- were already comitted to main, and now live with the general schema fixer operator, so removed the one tailored for tools

dafnapension · dafnapension · commit c6a16c622713 · 2025-08-29T18:00:15.000+03:00
Signed-off-by: dafnapension &lt;dafnashein@yahoo.com&gt;
diff --git a/prepare/cards/bfcl.py b/prepare/cards/bfcl.py
@@ -5,7 +5,6 @@
 from unitxt.operators import (
     Copy,
     ExecuteExpression,
-    FixJsonSchemaOfToolParameterTypes,
     Set,
 )
 from unitxt.stream_operators import JoinStreams
@@ -34,7 +33,7 @@
                 ),
                 Copy(field="question/0/0/content", to_field="query"),
                 Copy(field="function", to_field="tools"),
-                FixJsonSchemaOfToolParameterTypes(),
+                "operators.fix_json_schema",
                 # Process ground truth data in this dataset, which is a provided as a list of options per field,
                 # and convert it into a list of explicit tool calls
                 #
@@ -75,12 +74,12 @@
     for subset in [
         "simple",
         "multiple",
-        "live_multiple",  # instances above 900 reach size of hundreds of MBs
+        "live_multiple",
         "live_simple",
         "java",
         "javascript",
         "parallel",
-        "parallel_multiple",  # error caused by instance 179
+        "parallel_multiple",
         "live_parallel",
         "live_parallel_multiple",
     ]:
@@ -103,7 +102,7 @@
                 ),
                 Copy(field="question/*/0", to_field="dialog"),
                 Copy(field="function", to_field="tools"),
-                FixJsonSchemaOfToolParameterTypes(),
+                "operators.fix_json_schema",
                 ExecuteExpression(
                     expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]',
                     to_field="reference_calls",
@@ -139,52 +138,52 @@
         test_card(card, strict=False)
         add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
 
-    for subset in [
-        "live_relevance",
-        "live_irrelevance",
-    ]:
-        card = TaskCard(
-            loader=LoadJsonFile(
-                files={
-                    "test": base_path + f"BFCL_v3_{subset}.json",
-                },
-                lines=True,
-                data_classification_policy=["public"],
-            ),
-            preprocess_steps=[
-                Copy(field="question/*/0", to_field="dialog"),
-                Copy(field="function", to_field="tools"),
-                FixJsonSchemaOfToolParameterTypes(),
-                Set(fields={"reference_calls": []}),
-            ],
-            task="tasks.tool_calling.multi_turn",
-            templates=["templates.tool_calling.multi_turn"],
-            __description__=(
-                """The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
-            ),
-            __title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
-            __tags__={
-                "annotations_creators": "expert-generated",
-                "language": ["en"],
-                "license": "apache-2.0",
-                "size_categories": ["10K<n<100K"],
-                "task_categories": [
-                    "question-answering",
-                    "reading-comprehension",
-                    "tool-calling",
-                    "multi-turn-tool-calling",
-                ],
-                "task_ids": [
-                    "tool-calling",
-                    "multi-turn-tool-calling",
-                    "reading-comprehension",
+        for subset in [
+            "live_relevance",
+            "live_irrelevance",
+        ]:
+            card = TaskCard(
+                loader=LoadJsonFile(
+                    files={
+                        "test": base_path + f"BFCL_v3_{subset}.json",
+                    },
+                    lines=True,
+                    data_classification_policy=["public"],
+                ),
+                preprocess_steps=[
+                    Copy(field="question/*/0", to_field="dialog"),
+                    Copy(field="function", to_field="tools"),
+                    "operators.fix_json_schema",
+                    Set(fields={"reference_calls": []}),
                 ],
-            },
-        )
+                task="tasks.tool_calling.multi_turn",
+                templates=["templates.tool_calling.multi_turn"],
+                __description__=(
+                    """The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
+                ),
+                __title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
+                __tags__={
+                    "annotations_creators": "expert-generated",
+                    "language": ["en"],
+                    "license": "apache-2.0",
+                    "size_categories": ["10K<n<100K"],
+                    "task_categories": [
+                        "question-answering",
+                        "reading-comprehension",
+                        "tool-calling",
+                        "multi-turn-tool-calling",
+                    ],
+                    "task_ids": [
+                        "tool-calling",
+                        "multi-turn-tool-calling",
+                        "reading-comprehension",
+                    ],
+                },
+            )
 
-        # Test and add the card to the catalog
-        test_card(card, strict=False)
-        add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
+            # Test and add the card to the catalog
+            test_card(card, strict=False)
+            add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
 
     # card = TaskCard(
     #     loader=LoadJsonFile(
diff --git a/prepare/cards/xlam_function_calling.py b/prepare/cards/xlam_function_calling.py
@@ -12,17 +12,6 @@
 from unitxt.struct_data_operators import LoadJson
 from unitxt.test_utils.card import test_card
 
-
-def extract_required_parameters(instance, stream_name=None):
-    for tool in instance["tools"]:
-        required_params = []
-        for param_name, param_info in tool["parameters"]["properties"].items():
-            if "optional" not in param_info["type"].lower():
-                required_params.append(param_name)
-        tool["parameters"]["required"] = required_params
-    return instance
-
-
 card = TaskCard(
     loader=LoadHF(
         path="Salesforce/xlam-function-calling-60k",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json
@@ -21,9 +21,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "set",
             "fields": {
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json
@@ -21,9 +21,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "set",
             "fields": {
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/bfcl/simple_v3.json b/src/unitxt/catalog/cards/bfcl/simple_v3.json
@@ -30,9 +30,7 @@
             "field": "function",
             "to_field": "tools"
         },
-        {
-            "__type__": "fix_json_schema_of_tool_parameter_types"
-        },
+        "operators.fix_json_schema",
         {
             "__type__": "execute_expression",
             "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
diff --git a/src/unitxt/catalog/cards/xlam_function_calling_60k.json b/src/unitxt/catalog/cards/xlam_function_calling_60k.json
@@ -64,8 +64,10 @@
             "expression": "[[p for p, c in tool['parameters']['properties'].items() if 'optional' not in c['type'].lower()] for tool in tools]"
         },
         {
-            "__function__": "extract_required_parameters",
-            "source": "def extract_required_parameters(instance, stream_name=None):\n    for tool in instance[\"tools\"]:\n        required_params = []\n        for param_name, param_info in tool[\"parameters\"][\"properties\"].items():\n            if \"optional\" not in param_info[\"type\"].lower():\n                required_params.append(param_name)\n        tool[\"parameters\"][\"required\"] = required_params\n    return instance\n"
+            "__type__": "copy",
+            "field": "required",
+            "to_field": "tools/*/parameters/required",
+            "set_every_value": true
         },
         {
             "__type__": "fix_json_schema_of_parameter_types",
diff --git a/src/unitxt/operators.py b/src/unitxt/operators.py