From fb858abc704f3dfe19a1faaa86df198534e8d462 Mon Sep 17 00:00:00 2001 From: Ilya Boytsov Date: Tue, 9 Sep 2025 12:12:30 +0200 Subject: [PATCH 1/2] Do not initialise ref model when peft is used When running the DPO script with a PEFT configuration, e.g.: ``` ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/ddp.yaml --num_processes=1 scripts/dpo.py --config recipes/zephyr-7b-beta/dpo/config_qlora.yaml ``` the following error is raised: ```ValueError: You passed both a ref_model and a peft_config. For training PEFT adapters with DPO there is no need to pass a reference model. Please pass `ref_model=None` in case you want to train PEFT adapters, or pass a ref_model with `force_use_ref_model=True` in DPOTrainer's init. if you want to use a different ref_model.``` This happens because the script tries to initialize a reference model even when LoRA/PEFT is enabled. This PR updates the logic to skip reference model initialization when PEFT adapters are used. I am referring to the similar logic that is used in the trl example dpo script: https://github.com/huggingface/trl/blob/main/trl/scripts/dpo.py#L109 --- scripts/dpo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/dpo.py b/scripts/dpo.py index 07df54c7..4ca91086 100644 --- a/scripts/dpo.py +++ b/scripts/dpo.py @@ -98,7 +98,10 @@ def main(script_args, training_args, model_args): # Model & Tokenizer ################### model = get_model(model_args, training_args) - ref_model = get_model(model_args, training_args) + if model_args.use_peft: + ref_model = get_model(model_args, training_args) + else: + ref_model = None tokenizer = get_tokenizer(model_args, training_args) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token From 3a4b5b73083b26c250fe7e860e1550cbdcfb538c Mon Sep 17 00:00:00 2001 From: Ilya Boytsov Date: Tue, 9 Sep 2025 12:15:48 +0200 Subject: [PATCH 2/2] Fix if/else logic --- scripts/dpo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/dpo.py b/scripts/dpo.py index 4ca91086..a5c3168d 100644 --- a/scripts/dpo.py +++ b/scripts/dpo.py @@ -99,9 +99,9 @@ def main(script_args, training_args, model_args): ################### model = get_model(model_args, training_args) if model_args.use_peft: - ref_model = get_model(model_args, training_args) - else: ref_model = None + else: + ref_model = get_model(model_args, training_args) tokenizer = get_tokenizer(model_args, training_args) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token