Skip to content

Commit c6a16c6

Browse files
committed
revert bfcl to main, will be fixed in another PR, revert changes to xlam -- were already comitted to main, and now live with the general schema fixer operator, so removed the one tailored for tools
Signed-off-by: dafnapension <dafnashein@yahoo.com>
1 parent 4aa1e22 commit c6a16c6

17 files changed

+67
-201
lines changed

prepare/cards/bfcl.py

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from unitxt.operators import (
66
Copy,
77
ExecuteExpression,
8-
FixJsonSchemaOfToolParameterTypes,
98
Set,
109
)
1110
from unitxt.stream_operators import JoinStreams
@@ -34,7 +33,7 @@
3433
),
3534
Copy(field="question/0/0/content", to_field="query"),
3635
Copy(field="function", to_field="tools"),
37-
FixJsonSchemaOfToolParameterTypes(),
36+
"operators.fix_json_schema",
3837
# Process ground truth data in this dataset, which is a provided as a list of options per field,
3938
# and convert it into a list of explicit tool calls
4039
#
@@ -75,12 +74,12 @@
7574
for subset in [
7675
"simple",
7776
"multiple",
78-
"live_multiple", # instances above 900 reach size of hundreds of MBs
77+
"live_multiple",
7978
"live_simple",
8079
"java",
8180
"javascript",
8281
"parallel",
83-
"parallel_multiple", # error caused by instance 179
82+
"parallel_multiple",
8483
"live_parallel",
8584
"live_parallel_multiple",
8685
]:
@@ -103,7 +102,7 @@
103102
),
104103
Copy(field="question/*/0", to_field="dialog"),
105104
Copy(field="function", to_field="tools"),
106-
FixJsonSchemaOfToolParameterTypes(),
105+
"operators.fix_json_schema",
107106
ExecuteExpression(
108107
expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]',
109108
to_field="reference_calls",
@@ -139,52 +138,52 @@
139138
test_card(card, strict=False)
140139
add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
141140

142-
for subset in [
143-
"live_relevance",
144-
"live_irrelevance",
145-
]:
146-
card = TaskCard(
147-
loader=LoadJsonFile(
148-
files={
149-
"test": base_path + f"BFCL_v3_{subset}.json",
150-
},
151-
lines=True,
152-
data_classification_policy=["public"],
153-
),
154-
preprocess_steps=[
155-
Copy(field="question/*/0", to_field="dialog"),
156-
Copy(field="function", to_field="tools"),
157-
FixJsonSchemaOfToolParameterTypes(),
158-
Set(fields={"reference_calls": []}),
159-
],
160-
task="tasks.tool_calling.multi_turn",
161-
templates=["templates.tool_calling.multi_turn"],
162-
__description__=(
163-
"""The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
164-
),
165-
__title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
166-
__tags__={
167-
"annotations_creators": "expert-generated",
168-
"language": ["en"],
169-
"license": "apache-2.0",
170-
"size_categories": ["10K<n<100K"],
171-
"task_categories": [
172-
"question-answering",
173-
"reading-comprehension",
174-
"tool-calling",
175-
"multi-turn-tool-calling",
176-
],
177-
"task_ids": [
178-
"tool-calling",
179-
"multi-turn-tool-calling",
180-
"reading-comprehension",
141+
for subset in [
142+
"live_relevance",
143+
"live_irrelevance",
144+
]:
145+
card = TaskCard(
146+
loader=LoadJsonFile(
147+
files={
148+
"test": base_path + f"BFCL_v3_{subset}.json",
149+
},
150+
lines=True,
151+
data_classification_policy=["public"],
152+
),
153+
preprocess_steps=[
154+
Copy(field="question/*/0", to_field="dialog"),
155+
Copy(field="function", to_field="tools"),
156+
"operators.fix_json_schema",
157+
Set(fields={"reference_calls": []}),
181158
],
182-
},
183-
)
159+
task="tasks.tool_calling.multi_turn",
160+
templates=["templates.tool_calling.multi_turn"],
161+
__description__=(
162+
"""The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
163+
),
164+
__title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
165+
__tags__={
166+
"annotations_creators": "expert-generated",
167+
"language": ["en"],
168+
"license": "apache-2.0",
169+
"size_categories": ["10K<n<100K"],
170+
"task_categories": [
171+
"question-answering",
172+
"reading-comprehension",
173+
"tool-calling",
174+
"multi-turn-tool-calling",
175+
],
176+
"task_ids": [
177+
"tool-calling",
178+
"multi-turn-tool-calling",
179+
"reading-comprehension",
180+
],
181+
},
182+
)
184183

185-
# Test and add the card to the catalog
186-
test_card(card, strict=False)
187-
add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
184+
# Test and add the card to the catalog
185+
test_card(card, strict=False)
186+
add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
188187

189188
# card = TaskCard(
190189
# loader=LoadJsonFile(

prepare/cards/xlam_function_calling.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,6 @@
1212
from unitxt.struct_data_operators import LoadJson
1313
from unitxt.test_utils.card import test_card
1414

15-
16-
def extract_required_parameters(instance, stream_name=None):
17-
for tool in instance["tools"]:
18-
required_params = []
19-
for param_name, param_info in tool["parameters"]["properties"].items():
20-
if "optional" not in param_info["type"].lower():
21-
required_params.append(param_name)
22-
tool["parameters"]["required"] = required_params
23-
return instance
24-
25-
2615
card = TaskCard(
2716
loader=LoadHF(
2817
path="Salesforce/xlam-function-calling-60k",

src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121
"field": "function",
2222
"to_field": "tools"
2323
},
24-
{
25-
"__type__": "fix_json_schema_of_tool_parameter_types"
26-
},
24+
"operators.fix_json_schema",
2725
{
2826
"__type__": "set",
2927
"fields": {

src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121
"field": "function",
2222
"to_field": "tools"
2323
},
24-
{
25-
"__type__": "fix_json_schema_of_tool_parameter_types"
26-
},
24+
"operators.fix_json_schema",
2725
{
2826
"__type__": "set",
2927
"fields": {

src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
"field": "function",
3131
"to_field": "tools"
3232
},
33-
{
34-
"__type__": "fix_json_schema_of_tool_parameter_types"
35-
},
33+
"operators.fix_json_schema",
3634
{
3735
"__type__": "execute_expression",
3836
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",

0 commit comments

Comments
 (0)