From 2e3b3b1b7bdccf5344eb1b032f1fbac9a05fd590 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 09:33:16 +0000 Subject: [PATCH] chore(main): release 0.2.13 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 24 ++++++++++++++++++++++++ pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 7a1581b7..5ccb825b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.2.12" + ".": "0.2.13" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 38f07d88..e11fb634 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,30 @@ ### Bug Fixes +## [0.2.13](https://github.com/Aleph-Alpha-Research/eval-framework/compare/v0.2.12...v0.2.13) (2026-02-26) + + +### Features + +* add Global MMLU task ([#174](https://github.com/Aleph-Alpha-Research/eval-framework/issues/174)) ([0d0b227](https://github.com/Aleph-Alpha-Research/eval-framework/commit/0d0b22789b7817e120831cf688f0dd2aca84c1d8)) +* add GoldenSwag task ([#175](https://github.com/Aleph-Alpha-Research/eval-framework/issues/175)) ([a05e032](https://github.com/Aleph-Alpha-Research/eval-framework/commit/a05e0325e09c2ea0e5bf20284fff4428c7d126ab)) +* add tasks from the OLMES evaluation suite ([#180](https://github.com/Aleph-Alpha-Research/eval-framework/issues/180)) ([54f295d](https://github.com/Aleph-Alpha-Research/eval-framework/commit/54f295d7d82e71ba80d34b8f6758efc29bf27dd0)) +* adding aggregated results with errors, if error free ration is < 1.0 ([#181](https://github.com/Aleph-Alpha-Research/eval-framework/issues/181)) ([6f3e639](https://github.com/Aleph-Alpha-Research/eval-framework/commit/6f3e6397f65fa7be45bbcb6ff248cc2f8097f5fb)) +* BalancedCOPA dataset ([#177](https://github.com/Aleph-Alpha-Research/eval-framework/issues/177)) ([25161aa](https://github.com/Aleph-Alpha-Research/eval-framework/commit/25161aaab9acbc549997227cefa181414a368799)) +* Change to more complete revision of ZeroScrolls dataset ([#171](https://github.com/Aleph-Alpha-Research/eval-framework/issues/171)) ([a4e117e](https://github.com/Aleph-Alpha-Research/eval-framework/commit/a4e117eaf4c4fc3ad8bfbffb9b5aaf737ed78dbe)) +* COPA uses appropriate dataset splits ([#176](https://github.com/Aleph-Alpha-Research/eval-framework/issues/176)) ([55ebe44](https://github.com/Aleph-Alpha-Research/eval-framework/commit/55ebe446789e47e834f03bb62d49a3095c692026)) + + +### Bug Fixes + +* Change to more complete revision of zeroscrolls ([#173](https://github.com/Aleph-Alpha-Research/eval-framework/issues/173)) ([a84286e](https://github.com/Aleph-Alpha-Research/eval-framework/commit/a84286ea0f1d446b548087eb306ffbaeb06bd0e6)) +* Flores200 data reading issue ([#179](https://github.com/Aleph-Alpha-Research/eval-framework/issues/179)) ([9bf3155](https://github.com/Aleph-Alpha-Research/eval-framework/commit/9bf31551cce821fccf229e936aa8beb79046fcc7)) + + +### Documentation + +* updated with info for release-please ([#162](https://github.com/Aleph-Alpha-Research/eval-framework/issues/162)) ([cf38766](https://github.com/Aleph-Alpha-Research/eval-framework/commit/cf3876635af004102badb935360efbf840087824)) + ## [0.2.12](https://github.com/Aleph-Alpha-Research/eval-framework/compare/v0.2.11...v0.2.12) (2026-02-04) diff --git a/pyproject.toml b/pyproject.toml index 2d7df4b8..e669f136 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "eval-framework" -version = "0.2.12" +version = "0.2.13" description = "Evalulation Framework" readme = "README.md" license = { file = "LICENSE" } diff --git a/uv.lock b/uv.lock index fb78b508..8ce0a91e 100644 --- a/uv.lock +++ b/uv.lock @@ -826,7 +826,7 @@ wheels = [ [[package]] name = "eval-framework" -version = "0.2.12" +version = "0.2.13" source = { editable = "." } dependencies = [ { name = "antlr4-python3-runtime" },