huggingface · NathanHB · Nov 17, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py
@@ -44,7 +44,7 @@
     VersionsLogger,
 )
 from lighteval.utils.imports import is_package_available, not_installed_error_message
-from lighteval.utils.utils import obj_to_markdown
+from lighteval.utils.utils import obj_to_markdown, sanitize_filename
 
 
 logger = logging.getLogger(__name__)
@@ -336,16 +336,21 @@ def load_details_datasets(self, date_id: str, task_names: list[str]) -> dict[str
         logger.info(f"Loading details from {output_dir_details_sub_folder}")
         date_id = output_dir_details_sub_folder.name  # Overwrite date_id in case of latest
         details_datasets = {}
+        sanitized_task_names = {sanitize_filename(tn): tn for tn in task_names}
         for file in self.fs.glob(str(output_dir_details_sub_folder / f"details_*_{date_id}.parquet")):
-            task_name = Path(file).stem.replace("details_", "").replace(f"_{date_id}", "")
-            if "|".join(task_name.split("|")[:-1]) not in task_names:
-                logger.info(f"Skipping {task_name} because it is not in the task_names list")
-                continue
+            sanitized_task_name = Path(file).stem.replace("details_", "").replace(f"_{date_id}", "")
+            if sanitized_task_name in sanitized_task_names:
+                task_name = sanitized_task_names[sanitized_task_name]
+            else:
+                task_name = sanitized_task_name.replace("__", "|")
+                if "|".join(task_name.split("|")[:-1]) not in task_names:
+                    logger.info(f"Skipping {task_name} because it is not in the task_names list")
+                    continue
             dataset = load_dataset("parquet", data_files=file, split="train")
             details_datasets[task_name] = dataset
 
         for task_name in task_names:
-            if not any(task_name.startswith(task_name) for task_name in details_datasets.keys()):
+            if not any(task_name.startswith(tn) for tn in details_datasets.keys()):
                 raise ValueError(
                     f"Task {task_name} not found in details datasets. Check the tasks to be evaluated or the date_id used to load the details ({date_id})."
                 )
@@ -356,7 +361,8 @@ def save_details(self, date_id: str, details_datasets: dict[str, Dataset]):
         self.fs.mkdirs(output_dir_details_sub_folder, exist_ok=True)
         logger.info(f"Saving details to {output_dir_details_sub_folder}")
         for task_name, dataset in details_datasets.items():
-            output_file_details = output_dir_details_sub_folder / f"details_{task_name}_{date_id}.parquet"
+            sanitized_task_name = sanitize_filename(task_name)
+            output_file_details = output_dir_details_sub_folder / f"details_{sanitized_task_name}_{date_id}.parquet"
             with self.fs.open(str(output_file_details), "wb") as f:
                 dataset.to_parquet(f)
 
@@ -421,7 +427,8 @@ def push_to_hub(
         results_dataset.to_parquet(f"{fsspec_repo_uri}/{result_file_base_name}.parquet")
 
         for task_name, dataset in details.items():
-            output_file_details = Path(date_id) / f"details_{task_name}_{date_id}.parquet"
+            sanitized_task_name = sanitize_filename(task_name)
+            output_file_details = Path(date_id) / f"details_{sanitized_task_name}_{date_id}.parquet"
             dataset.to_parquet(f"{fsspec_repo_uri}/{output_file_details}")
 
         self.recreate_metadata_card(repo_id)
@@ -474,11 +481,15 @@ def recreate_metadata_card(self, repo_id: str) -> None:  # noqa: C901
 
             # subfile have this general format:
             # `2023-09-03T10-57-04.203304/details_harness|hendrycksTest-us_foreign_policy|5_2023-09-03T10-57-04.203304.parquet`
+            # or with sanitized names: `2023-09-03T10-57-04.203304/details_harness_hendrycksTest-us_foreign_policy__5_2023-09-03T10-57-04.203304.parquet`
             # in the iso date, the `:` are replaced by `-` because windows does not allow `:` in their filenames
-            task_name = (
+            sanitized_task_name = (
                 details_file_regex.match(os.path.basename(sub_file)).group("task_name")  # type: ignore
             )
-            # task_name is then equal to `leaderboard|mmlu:us_foreign_policy|5`
+            # Reconstruct original task name by replacing underscores with pipes
+            # This handles both old format (with pipes) and new format (sanitized with underscores)
+            task_name = sanitized_task_name.replace("__", "|")
+            # task_name is then equal to `leaderboard|mmlu:us_foreign_policy|5` (or sanitized equivalent)
 
             # to be able to parse the filename as iso dates, we need to re-replace the `-` with `:`
             # iso_date[13] = iso_date[16] = ':'
@@ -514,7 +525,9 @@ def recreate_metadata_card(self, repo_id: str) -> None:  # noqa: C901
                 task_name_match = details_file_regex.match(filename)  # type: ignore
                 if not task_name_match:
                     raise ValueError(f"Could not parse task name from filename: {filename}")
-                task_name = task_name_match.group("task_name")
+                sanitized_task_name = task_name_match.group("task_name")
+                # Reconstruct original task name by replacing underscores with pipes
+                task_name = sanitized_task_name.replace("__", "|")
                 eval_date = task_name_match.group("date")
 
                 sanitized_task = re.sub(r"\W", "_", task_name)

diff --git a/src/lighteval/utils/cache_management.py b/src/lighteval/utils/cache_management.py
@@ -37,7 +37,7 @@
 from lighteval.tasks.lighteval_task import LightevalTaskConfig
 from lighteval.tasks.registry import Registry
 from lighteval.tasks.requests import Doc, SamplingMethod
-from lighteval.utils.utils import as_list
+from lighteval.utils.utils import as_list, sanitize_filename
 
 
 logger = logging.getLogger(__name__)
@@ -114,7 +114,10 @@ def _load_cached_indices(self) -> dict:
             try:
                 # cache_file.parts gives all the subfolders of the url, up to the file name
                 # last 3 are task_name/task_hash/file_name.parquet, so we take -3 and -2
-                task_name, task_hash = cache_file.parts[-3:-1]
+                sanitized_task_name, task_hash = cache_file.parts[-3:-1]
+                # Reconstruct original task name by replacing underscores with pipes
+                # This works because task names use "|" as separators and we sanitize by replacing "|" with "__"
+                task_name = sanitized_task_name.replace("__", "|")
                 sampling_method = SamplingMethod[cache_file.stem]  # removes the file extension
                 task_id = TaskID(task_name, task_hash, sampling_method)
 
@@ -191,7 +194,8 @@ def get_cache_path(self, task_id: TaskID) -> Path:
         Returns:
             Path: Path to the cache file for the given task and sample type
         """
-        return self.cache_dir / task_id.task_name / task_id.task_hash / f"{task_id.sampling_method.name}.parquet"
+        sanitized_task_name = sanitize_filename(task_id.task_name)
+        return self.cache_dir / sanitized_task_name / task_id.task_hash / f"{task_id.sampling_method.name}.parquet"
 
     def get_task_id(self, task_name: str, sampling_method: SamplingMethod) -> TaskID:
         """Returns a unique task indentifier. Depends on the task name,

diff --git a/src/lighteval/utils/utils.py b/src/lighteval/utils/utils.py
@@ -315,3 +315,17 @@ def remove_reasoning_tags(text: str, tag_pairs: list[tuple[str, str]]) -> str:
                 break
 
     return result
+
+
+def sanitize_filename(name: str) -> str:
+    """Sanitizes a filename by replacing forbidden characters.
+
+    Replaces characters that are not allowed in filenames on various operating systems:
+    - Windows: < > : " / \\ | ? *
+    - Linux/Mac: / and \0
+    """
+    forbidden_chars = ["|", ":", "/", "\\", "<", ">", '"', "?", "*"]
+    sanitized = name
+    for char in forbidden_chars:
+        sanitized = sanitized.replace(char, "__")
+    return sanitized
diff --git a/...rd-4|0_2025-11-05T15-23-34.026089.parquet → ...d-4__0_2025-11-05T15-23-34.026089.parquet b/...rd-4|0_2025-11-05T15-23-34.026089.parquet → ...d-4__0_2025-11-05T15-23-34.026089.parquet
diff --git a/...-rat|0_2025-11-05T14-43-47.148527.parquet → ...rat__0_2025-11-05T14-43-47.148527.parquet b/...-rat|0_2025-11-05T14-43-47.148527.parquet → ...rat__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...a-en|0_2025-11-05T14-43-47.148527.parquet → ...-en__0_2025-11-05T14-43-47.148527.parquet b/...a-en|0_2025-11-05T14-43-47.148527.parquet → ...-en__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...t-ar|0_2025-11-05T14-43-47.148527.parquet → ...-ar__0_2025-11-05T14-43-47.148527.parquet b/...t-ar|0_2025-11-05T14-43-47.148527.parquet → ...-ar__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...t-lr|0_2025-11-05T14-43-47.148527.parquet → ...-lr__0_2025-11-05T14-43-47.148527.parquet b/...t-lr|0_2025-11-05T14-43-47.148527.parquet → ...-lr__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...t-rc|0_2025-11-05T14-43-47.148527.parquet → ...-rc__0_2025-11-05T14-43-47.148527.parquet b/...t-rc|0_2025-11-05T14-43-47.148527.parquet → ...-rc__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...sage|0_2025-11-05T14-43-47.148527.parquet → ...age__0_2025-11-05T14-43-47.148527.parquet b/...sage|0_2025-11-05T14-43-47.148527.parquet → ...age__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...t-en|0_2025-11-05T14-43-47.148527.parquet → ...-en__0_2025-11-05T14-43-47.148527.parquet b/...t-en|0_2025-11-05T14-43-47.148527.parquet → ...-en__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...nge|25_2025-11-05T14-43-47.148527.parquet → ...ge__25_2025-11-05T14-43-47.148527.parquet b/...nge|25_2025-11-05T14-43-47.148527.parquet → ...ge__25_2025-11-05T14-43-47.148527.parquet
diff --git a/...ment|3_2025-11-05T14-43-47.148527.parquet → ...ent__3_2025-11-05T14-43-47.148527.parquet b/...ment|3_2025-11-05T14-43-47.148527.parquet → ...ent__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ding|3_2025-11-05T14-43-47.148527.parquet → ...ing__3_2025-11-05T14-43-47.148527.parquet b/...ding|3_2025-11-05T14-43-47.148527.parquet → ...ing__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...n_qa|3_2025-11-05T14-43-47.148527.parquet → ..._qa__3_2025-11-05T14-43-47.148527.parquet b/...n_qa|3_2025-11-05T14-43-47.148527.parquet → ..._qa__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...apes|3_2025-11-05T14-43-47.148527.parquet → ...pes__3_2025-11-05T14-43-47.148527.parquet b/...apes|3_2025-11-05T14-43-47.148527.parquet → ...pes__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet b/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet b/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...tion|3_2025-11-05T14-43-47.148527.parquet → ...ion__3_2025-11-05T14-43-47.148527.parquet b/...tion|3_2025-11-05T14-43-47.148527.parquet → ...ion__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...gate|3_2025-11-05T14-43-47.148527.parquet → ...ate__3_2025-11-05T14-43-47.148527.parquet b/...gate|3_2025-11-05T14-43-47.148527.parquet → ...ate__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ames|3_2025-11-05T14-43-47.148527.parquet → ...mes__3_2025-11-05T14-43-47.148527.parquet b/...ames|3_2025-11-05T14-43-47.148527.parquet → ...mes__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...tion|3_2025-11-05T14-43-47.148527.parquet → ...ion__3_2025-11-05T14-43-47.148527.parquet b/...tion|3_2025-11-05T14-43-47.148527.parquet → ...ion__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...arks|3_2025-11-05T14-43-47.148527.parquet → ...rks__3_2025-11-05T14-43-47.148527.parquet b/...arks|3_2025-11-05T14-43-47.148527.parquet → ...rks__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...nces|3_2025-11-05T14-43-47.148527.parquet → ...ces__3_2025-11-05T14-43-47.148527.parquet b/...nces|3_2025-11-05T14-43-47.148527.parquet → ...ces__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet b/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet b/...ects|3_2025-11-05T14-43-47.148527.parquet → ...cts__3_2025-11-05T14-43-47.148527.parquet
diff --git a/...test|0_2025-11-05T14-43-47.148527.parquet → ...est__0_2025-11-05T14-43-47.148527.parquet b/...test|0_2025-11-05T14-43-47.148527.parquet → ...est__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...wag|10_2025-11-05T14-43-47.148527.parquet → ...ag__10_2025-11-05T14-43-47.148527.parquet b/...wag|10_2025-11-05T14-43-47.148527.parquet → ...ag__10_2025-11-05T14-43-47.148527.parquet
diff --git a/...stry|5_2025-11-05T14-43-47.148527.parquet → ...try__5_2025-11-05T14-43-47.148527.parquet b/...stry|5_2025-11-05T14-43-47.148527.parquet → ...try__5_2025-11-05T14-43-47.148527.parquet
diff --git a/...licy|5_2025-11-05T14-43-47.148527.parquet → ...icy__5_2025-11-05T14-43-47.148527.parquet b/...licy|5_2025-11-05T14-43-47.148527.parquet → ...icy__5_2025-11-05T14-43-47.148527.parquet
diff --git a/...a:mc|0_2025-11-05T14-43-47.148527.parquet → ..._mc__0_2025-11-05T14-43-47.148527.parquet b/...a:mc|0_2025-11-05T14-43-47.148527.parquet → ..._mc__0_2025-11-05T14-43-47.148527.parquet
diff --git a/...-rat|0_2025-11-05T14-52-08.352779.parquet → ...rat__0_2025-11-05T14-52-08.352779.parquet b/...-rat|0_2025-11-05T14-52-08.352779.parquet → ...rat__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...a-en|0_2025-11-05T14-52-08.352779.parquet → ...-en__0_2025-11-05T14-52-08.352779.parquet b/...a-en|0_2025-11-05T14-52-08.352779.parquet → ...-en__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...t-ar|0_2025-11-05T14-52-08.352779.parquet → ...-ar__0_2025-11-05T14-52-08.352779.parquet b/...t-ar|0_2025-11-05T14-52-08.352779.parquet → ...-ar__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...t-lr|0_2025-11-05T14-52-08.352779.parquet → ...-lr__0_2025-11-05T14-52-08.352779.parquet b/...t-lr|0_2025-11-05T14-52-08.352779.parquet → ...-lr__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...t-rc|0_2025-11-05T14-52-08.352779.parquet → ...-rc__0_2025-11-05T14-52-08.352779.parquet b/...t-rc|0_2025-11-05T14-52-08.352779.parquet → ...-rc__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...sage|0_2025-11-05T14-52-08.352779.parquet → ...age__0_2025-11-05T14-52-08.352779.parquet b/...sage|0_2025-11-05T14-52-08.352779.parquet → ...age__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...t-en|0_2025-11-05T14-52-08.352779.parquet → ...-en__0_2025-11-05T14-52-08.352779.parquet b/...t-en|0_2025-11-05T14-52-08.352779.parquet → ...-en__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...nge|25_2025-11-05T14-52-08.352779.parquet → ...ge__25_2025-11-05T14-52-08.352779.parquet b/...nge|25_2025-11-05T14-52-08.352779.parquet → ...ge__25_2025-11-05T14-52-08.352779.parquet
diff --git a/...ment|3_2025-11-05T14-52-08.352779.parquet → ...ent__3_2025-11-05T14-52-08.352779.parquet b/...ment|3_2025-11-05T14-52-08.352779.parquet → ...ent__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ding|3_2025-11-05T14-52-08.352779.parquet → ...ing__3_2025-11-05T14-52-08.352779.parquet b/...ding|3_2025-11-05T14-52-08.352779.parquet → ...ing__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...n_qa|3_2025-11-05T14-52-08.352779.parquet → ..._qa__3_2025-11-05T14-52-08.352779.parquet b/...n_qa|3_2025-11-05T14-52-08.352779.parquet → ..._qa__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...apes|3_2025-11-05T14-52-08.352779.parquet → ...pes__3_2025-11-05T14-52-08.352779.parquet b/...apes|3_2025-11-05T14-52-08.352779.parquet → ...pes__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet b/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet b/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...tion|3_2025-11-05T14-52-08.352779.parquet → ...ion__3_2025-11-05T14-52-08.352779.parquet b/...tion|3_2025-11-05T14-52-08.352779.parquet → ...ion__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...gate|3_2025-11-05T14-52-08.352779.parquet → ...ate__3_2025-11-05T14-52-08.352779.parquet b/...gate|3_2025-11-05T14-52-08.352779.parquet → ...ate__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ames|3_2025-11-05T14-52-08.352779.parquet → ...mes__3_2025-11-05T14-52-08.352779.parquet b/...ames|3_2025-11-05T14-52-08.352779.parquet → ...mes__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...tion|3_2025-11-05T14-52-08.352779.parquet → ...ion__3_2025-11-05T14-52-08.352779.parquet b/...tion|3_2025-11-05T14-52-08.352779.parquet → ...ion__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...arks|3_2025-11-05T14-52-08.352779.parquet → ...rks__3_2025-11-05T14-52-08.352779.parquet b/...arks|3_2025-11-05T14-52-08.352779.parquet → ...rks__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...nces|3_2025-11-05T14-52-08.352779.parquet → ...ces__3_2025-11-05T14-52-08.352779.parquet b/...nces|3_2025-11-05T14-52-08.352779.parquet → ...ces__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet b/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet b/...ects|3_2025-11-05T14-52-08.352779.parquet → ...cts__3_2025-11-05T14-52-08.352779.parquet
diff --git a/...test|0_2025-11-05T14-52-08.352779.parquet → ...est__0_2025-11-05T14-52-08.352779.parquet b/...test|0_2025-11-05T14-52-08.352779.parquet → ...est__0_2025-11-05T14-52-08.352779.parquet
diff --git a/...wag|10_2025-11-05T14-52-08.352779.parquet → ...ag__10_2025-11-05T14-52-08.352779.parquet b/...wag|10_2025-11-05T14-52-08.352779.parquet → ...ag__10_2025-11-05T14-52-08.352779.parquet
diff --git a/...stry|5_2025-11-05T14-52-08.352779.parquet → ...try__5_2025-11-05T14-52-08.352779.parquet b/...stry|5_2025-11-05T14-52-08.352779.parquet → ...try__5_2025-11-05T14-52-08.352779.parquet
diff --git a/...licy|5_2025-11-05T14-52-08.352779.parquet → ...icy__5_2025-11-05T14-52-08.352779.parquet b/...licy|5_2025-11-05T14-52-08.352779.parquet → ...icy__5_2025-11-05T14-52-08.352779.parquet
diff --git a/...a:mc|0_2025-11-05T14-52-08.352779.parquet → ..._mc__0_2025-11-05T14-52-08.352779.parquet b/...a:mc|0_2025-11-05T14-52-08.352779.parquet → ..._mc__0_2025-11-05T14-52-08.352779.parquet
diff --git a/tests/slow_tests/sample_comparison.py b/tests/slow_tests/sample_comparison.py
@@ -46,10 +46,13 @@ def load_sample_details(details_dir: str):
         return details
 
     for parquet_file in details_path.glob("details_*.parquet"):
-        # Extract task name from parquet filename, keeping the full task path with "|" separators
-        task_name = parquet_file.stem.replace("details_", "").rsplit("_", 1)[
+        # Extract task name from parquet filename
+        # Handle both sanitized format (with underscores) and old format (with pipes)
+        sanitized_task_name = parquet_file.stem.replace("details_", "").rsplit("_", 1)[
             0
-        ]  # Split from right to preserve task name with "|"
+        ]  # Split from right to get task name (before date_id)
+        # Reconstruct original task name by replacing underscores with pipes
+        task_name = sanitized_task_name.replace("__", "|")
         dataset = Dataset.from_parquet(str(parquet_file))
         details[task_name] = list(dataset)