diff --git a/CHANGELOG.md b/CHANGELOG.md index 38d47b92..743b7100 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ - Fix: W&B integration respects WANDB_ARTIFACT_DIR. In addition, new env var WANDB_CACHE_SKIP controls cache use. - Dropped support for S3 storages without proper SSL certificates. - Added support for W&B artifacts on local storage which don't need to be downloaded and may be earlier available. +- Hardcoded a date in template formatting application for consistency in eval results. ## 0.2.2 diff --git a/src/template_formatting/formatter.py b/src/template_formatting/formatter.py index ccf5a206..555c9ce2 100644 --- a/src/template_formatting/formatter.py +++ b/src/template_formatting/formatter.py @@ -323,6 +323,11 @@ def format( # type: ignore[override] "continue_final_message": is_prefill, } ) + if "date_string" not in template_kwargs: + # some templates are forcing a date_string, harcoding one here for consistency in evaluations, e.g. + # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama4/processing_llama4.py + # template_kwargs["date_string"] = "26 Jul 2024" + template_kwargs["date_string"] = "unknown" return self.tokenizer.apply_chat_template(hf_chat, **template_kwargs)