diff --git a/results/abhishec-20260307-124051.json b/results/abhishec-20260307-124051.json new file mode 100644 index 0000000..c05b44d --- /dev/null +++ b/results/abhishec-20260307-124051.json @@ -0,0 +1,66 @@ +{ + "participants": { + "agent": "019cade3-a63f-7283-9991-ab5dbcd815e4" + }, + "results": [ + { + "domain": "airline", + "score": 36.0, + "max_score": 50, + "pass_rate": 72.0, + "task_rewards": { + "0": 1.0, + "1": 1.0, + "2": 1.0, + "3": 1.0, + "4": 1.0, + "5": 1.0, + "6": 1.0, + "7": 0.0, + "8": 1.0, + "9": 1.0, + "10": 1.0, + "11": 1.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 1.0, + "17": 1.0, + "18": 1.0, + "19": 1.0, + "20": 1.0, + "21": 0.0, + "22": 1.0, + "23": 0.0, + "24": 1.0, + "25": 0.0, + "26": 1.0, + "27": 0.0, + "28": 1.0, + "29": 1.0, + "30": 1.0, + "31": 1.0, + "32": 0.0, + "33": 0.0, + "34": 1.0, + "35": 0.0, + "36": 1.0, + "37": 0.0, + "38": 1.0, + "39": 1.0, + "40": 1.0, + "41": 1.0, + "42": 1.0, + "43": 1.0, + "44": 0.0, + "45": 1.0, + "46": 1.0, + "47": 1.0, + "48": 1.0, + "49": 1.0 + }, + "time_used": 1965.3839840888977 + } + ] +} \ No newline at end of file diff --git a/submissions/abhishec-20260307-124051.provenance.json b/submissions/abhishec-20260307-124051.provenance.json new file mode 100644 index 0000000..64c64a6 --- /dev/null +++ b/submissions/abhishec-20260307-124051.provenance.json @@ -0,0 +1,8 @@ +{ + "image_digests": { + "green-agent": "ghcr.io/rdi-foundation/agentbeats-tutorial-tau2-evaluator@sha256:3b2f863a4717c9c0792228befd1d2f6acf8c9d78d902d1686564320161cc75b1", + "agent": "public.ecr.aws/d9m7h3k5/agentbench-purple@sha256:61be178231c8e2825b42eecdf9f59d7e524951d49fbd3e437c7d361e0d3fd886", + "agentbeats-client": "ghcr.io/agentbeats/agentbeats-client@sha256:13dfe3ef4e583a80e7ce2fe3becd0ce3b879841368a7f4fa40b6ebbabeeb014e" + }, + "timestamp": "2026-03-07T12:40:51Z" +} \ No newline at end of file diff --git a/submissions/abhishec-20260307-124051.toml b/submissions/abhishec-20260307-124051.toml new file mode 100644 index 0000000..c8ac286 --- /dev/null +++ b/submissions/abhishec-20260307-124051.toml @@ -0,0 +1,13 @@ +# Purple Business Process Agent — Tau2-Bench assessment (v53 — new OpenAI key + smart routing + valid API key) +[green_agent] +agentbeats_id = "019b4d06-1da3-7c22-88fa-f4660061a779" +env = { OPENAI_API_KEY = "${OPENAI_API_KEY}" } + +[[participants]] +agentbeats_id = "019cade3-a63f-7283-9991-ab5dbcd815e4" +name = "agent" +env = { ANTHROPIC_API_KEY = "${ANTHROPIC_API_KEY}", GREEN_AGENT_MCP_URL = "http://green-agent:9009" } + +[config] +domain = "airline" +num_tasks = 50