From 09e0124eadfd5fa2a88213c1c50e4aed0fdf725c Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 10 Apr 2025 10:42:16 -0400 Subject: [PATCH 01/15] sm fix --- .../accelerate_configs/deepspeed_zero2.yaml | 21 +++++++++++++++++++ .../accelerate_configs/deepspeed_zero3.yaml | 2 +- benchmarks/dpo/continual_dpo_trainer.py | 7 ++++--- benchmarks/dpo/dpo_continual.py | 15 +++++++------ benchmarks/reward_modeling.py | 1 + 5 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml new file mode 100644 index 00000000..877a5b8f --- /dev/null +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml @@ -0,0 +1,21 @@ +compute_environment: LOCAL_MACHINE +debug: false +deepspeed_config: + deepspeed_multinode_launcher: standard + offload_optimizer_device: none + offload_param_device: none + zero3_init_flag: false + zero_stage: 2 +distributed_type: DEEPSPEED +downcast_bf16: 'no' +machine_rank: 0 +main_training_function: main +mixed_precision: 'bf16' +num_machines: 2 +num_processes: 1 +rdzv_backend: static +same_network: true +tpu_env: [] +tpu_use_cluster: false +tpu_use_sudo: false +use_cpu: false diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml index 7f17a48f..29507c4c 100644 --- a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml @@ -11,7 +11,7 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 1 # TODO change to whatever number of gpus is used +num_processes: 2 # TODO change to whatever number of gpus is used rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/dpo/continual_dpo_trainer.py b/benchmarks/dpo/continual_dpo_trainer.py index ee1820ff..47340409 100644 --- a/benchmarks/dpo/continual_dpo_trainer.py +++ b/benchmarks/dpo/continual_dpo_trainer.py @@ -320,7 +320,8 @@ def log( train_eval = 'train' if 'loss' in logs else 'eval' print(f'Logging {train_eval} metrics...') if train_eval == 'eval': - print('Computing policy metrics...') - eval_policy_metrics = self.evaluate_policy() - logs.update(eval_policy_metrics) + if self.reward_model is not None: + print('Computing policy metrics...') + eval_policy_metrics = self.evaluate_policy() + logs.update(eval_policy_metrics) return super().log(logs, start_time) diff --git a/benchmarks/dpo/dpo_continual.py b/benchmarks/dpo/dpo_continual.py index 080d8d51..1b19b2df 100644 --- a/benchmarks/dpo/dpo_continual.py +++ b/benchmarks/dpo/dpo_continual.py @@ -3,11 +3,6 @@ import os import torch -from continual_dpo_trainer import ( - ContinualDPOArguments, - ContinualDPOConfig, - ContinualDPOTrainer, -) from datasets import Dataset from transformers import ( AutoModelForCausalLM, @@ -24,6 +19,8 @@ from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE import wandb as wb +from transformers.trainer_utils import is_main_process + from benchmarks.dataloading import init_continual_dataset from benchmarks.dpo.continual_dpo_trainer import ( ContinualDPOArguments, @@ -104,7 +101,7 @@ def main( # first check the hub if the model is present try: AutoModelForSequenceClassification.from_pretrained( - reward_path, num_labels=1 + reward_path, num_labels=1, use_cache=True ) except: # if not found in the hub, check the local path @@ -152,8 +149,10 @@ def main( print(f'eval/dataset/{i}') trainer.log_metrics(f'eval/dataset/{i}', metrics) trainer.save_metrics(f'eval', metrics) - wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] - wb.log({f'task/{current_dataset_name}/last': metrics}) # type: ignore[attr-defined] + # if is_main_process(): + if training_args.local_rank in (None, -1, 0): + wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] + wb.log({f'task/{current_dataset_name}/last': metrics}) # type: ignore[attr-defined] # Save and push to hub trainer.save_model(os.path.join(training_args.output_dir, 'last')) diff --git a/benchmarks/reward_modeling.py b/benchmarks/reward_modeling.py index 1e06bdf4..a211a740 100644 --- a/benchmarks/reward_modeling.py +++ b/benchmarks/reward_modeling.py @@ -226,6 +226,7 @@ def train_model( except Exception as e: print(f'Job {i + 1} failed with error: {e}') else: + print(f'Running on {script_args.dataset_index} task out of {len(continual_dataset)} tasks') dataset = continual_dataset[script_args.dataset_index] train_model( script_args, training_args, model_args, dataset, script_args.dataset_index From defebfb216b8322953f4893eb30c6f25a2f98ba5 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 10 Apr 2025 10:42:27 -0400 Subject: [PATCH 02/15] sm fix --- benchmarks/dpo/dpo_continual.py | 4 +--- benchmarks/reward_modeling.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/dpo/dpo_continual.py b/benchmarks/dpo/dpo_continual.py index 1b19b2df..31e08e24 100644 --- a/benchmarks/dpo/dpo_continual.py +++ b/benchmarks/dpo/dpo_continual.py @@ -3,6 +3,7 @@ import os import torch +import wandb as wb from datasets import Dataset from transformers import ( AutoModelForCausalLM, @@ -18,9 +19,6 @@ ) from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE -import wandb as wb -from transformers.trainer_utils import is_main_process - from benchmarks.dataloading import init_continual_dataset from benchmarks.dpo.continual_dpo_trainer import ( ContinualDPOArguments, diff --git a/benchmarks/reward_modeling.py b/benchmarks/reward_modeling.py index a211a740..95053f32 100644 --- a/benchmarks/reward_modeling.py +++ b/benchmarks/reward_modeling.py @@ -226,7 +226,9 @@ def train_model( except Exception as e: print(f'Job {i + 1} failed with error: {e}') else: - print(f'Running on {script_args.dataset_index} task out of {len(continual_dataset)} tasks') + print( + f'Running on {script_args.dataset_index} task out of {len(continual_dataset)} tasks' + ) dataset = continual_dataset[script_args.dataset_index] train_model( script_args, training_args, model_args, dataset, script_args.dataset_index From 363b5a9c92de67ffb66bfc53579c066dd5a378e3 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 24 Apr 2025 20:34:38 +0800 Subject: [PATCH 03/15] upd dataset json --- benchmarks/dataloading.py | 15 +++++++++++++-- .../dpo/accelerate_configs/deepspeed_zero3.yaml | 2 +- benchmarks/reward_modeling.py | 3 ++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/benchmarks/dataloading.py b/benchmarks/dataloading.py index b93d07b2..9dbec4b6 100644 --- a/benchmarks/dataloading.py +++ b/benchmarks/dataloading.py @@ -89,9 +89,20 @@ def init_continual_dataset( data = ContinualAlignmentDataset.from_json(dataset) except OSError: # need to try downloading from hub try: + # json_name = dataset.split('/', )[-1] + # print(f'Downloading {json_name} from Hugging Face Hub...') local_path = hf_hub_download( - repo_id=dataset, filename='dataset.json', repo_type='dataset' - ) + repo_id=f"LifelongAlignment/{dataset}", filename='dataset.json', repo_type='dataset' + ) + # local_path = hf_hub_download( + # repo_id=f"LifelongAlignment/{dataset}", filename=f'{dataset}.json', repo_type='dataset' + # ) + # local_path = hf_hub_download( + # repo_id=f"LifelongAlignment/{dataset}", filename=f'{json_name}.json', repo_type='dataset' + # ) + # local_path = hf_hub_download( + # repo_id=dataset, filename='dataset.json', repo_type='dataset' + # ) data = ContinualAlignmentDataset.from_json(local_path) except Exception as e: raise ValueError(f'Error loading dataset: {e}') diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml index 29507c4c..6b68067b 100644 --- a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml @@ -11,7 +11,7 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 2 # TODO change to whatever number of gpus is used +num_processes: 8 # TODO change to whatever number of gpus is used rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/reward_modeling.py b/benchmarks/reward_modeling.py index 95053f32..2f5f7905 100644 --- a/benchmarks/reward_modeling.py +++ b/benchmarks/reward_modeling.py @@ -129,6 +129,7 @@ def train_model( trust_remote_code=model_args.trust_remote_code, **model_kwargs, ) + # Align padding tokens between tokenizer and model model.config.pad_token_id = tokenizer.pad_token_id @@ -227,7 +228,7 @@ def train_model( print(f'Job {i + 1} failed with error: {e}') else: print( - f'Running on {script_args.dataset_index} task out of {len(continual_dataset)} tasks' + f'Running on {script_args.dataset_index+1} task out of {len(continual_dataset)} tasks' ) dataset = continual_dataset[script_args.dataset_index] train_model( From d6540331a234a19ffd077e0f0db8dee49790e341 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 24 Apr 2025 20:35:14 +0800 Subject: [PATCH 04/15] upd dataset json --- benchmarks/dataloading.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/dataloading.py b/benchmarks/dataloading.py index 9dbec4b6..837942f2 100644 --- a/benchmarks/dataloading.py +++ b/benchmarks/dataloading.py @@ -92,8 +92,10 @@ def init_continual_dataset( # json_name = dataset.split('/', )[-1] # print(f'Downloading {json_name} from Hugging Face Hub...') local_path = hf_hub_download( - repo_id=f"LifelongAlignment/{dataset}", filename='dataset.json', repo_type='dataset' - ) + repo_id=f'LifelongAlignment/{dataset}', + filename='dataset.json', + repo_type='dataset', + ) # local_path = hf_hub_download( # repo_id=f"LifelongAlignment/{dataset}", filename=f'{dataset}.json', repo_type='dataset' # ) From a1d0cdae2548acac57af53f39ffca3a3f0f63818 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Sun, 27 Apr 2025 10:15:45 +0800 Subject: [PATCH 05/15] upd --- benchmarks/dataloading.py | 2 +- benchmarks/dpo/dpo_continual.py | 4 +++- benchmarks/dpo_ewc/dpo_EWC_continual.py | 10 +++++++--- benchmarks/ppo/ppo_continual.py | 11 ++++++++--- benchmarks/ppo_ewc/ppo_EWC_continual.py | 7 ++++--- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/benchmarks/dataloading.py b/benchmarks/dataloading.py index 837942f2..65b5dd7b 100644 --- a/benchmarks/dataloading.py +++ b/benchmarks/dataloading.py @@ -93,7 +93,7 @@ def init_continual_dataset( # print(f'Downloading {json_name} from Hugging Face Hub...') local_path = hf_hub_download( repo_id=f'LifelongAlignment/{dataset}', - filename='dataset.json', + filename='data.json', repo_type='dataset', ) # local_path = hf_hub_download( diff --git a/benchmarks/dpo/dpo_continual.py b/benchmarks/dpo/dpo_continual.py index 31e08e24..f1edf2fe 100644 --- a/benchmarks/dpo/dpo_continual.py +++ b/benchmarks/dpo/dpo_continual.py @@ -132,6 +132,9 @@ def main( peft_config=peft_config, ) + if i == 0: + trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # TODO will throw Invalidate trace cache @ step 10: expected module 11, but got module 19 # https://github.com/deepspeedai/DeepSpeed/issues/6870 # Fix with deepspeed fix release @@ -147,7 +150,6 @@ def main( print(f'eval/dataset/{i}') trainer.log_metrics(f'eval/dataset/{i}', metrics) trainer.save_metrics(f'eval', metrics) - # if is_main_process(): if training_args.local_rank in (None, -1, 0): wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] wb.log({f'task/{current_dataset_name}/last': metrics}) # type: ignore[attr-defined] diff --git a/benchmarks/dpo_ewc/dpo_EWC_continual.py b/benchmarks/dpo_ewc/dpo_EWC_continual.py index 35a00b1e..547e87c0 100644 --- a/benchmarks/dpo_ewc/dpo_EWC_continual.py +++ b/benchmarks/dpo_ewc/dpo_EWC_continual.py @@ -3,6 +3,7 @@ import os import torch +import wandb as wb from continual_dpo_EWC_trainer import ( ContinualDPOEWCArguments, ContinualDPOEWCConfig, @@ -23,7 +24,6 @@ ) from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE -import wandb as wb from benchmarks.dataloading import init_continual_dataset @@ -132,6 +132,9 @@ def main( peft_config=peft_config, ) + if i == 0: + trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # TODO will throw Invalidate trace cache @ step 10: expected module 11, but got module 19 # https://github.com/deepspeedai/DeepSpeed/issues/6870 # Fix with deepspeed fix release @@ -147,8 +150,9 @@ def main( print(f'eval/dataset/{i}') trainer.log_metrics(f'eval/dataset/{i}', metrics) trainer.save_metrics(f'eval', metrics) - wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] - wb.log({f'task/{current_dataset_name}/last': metrics}) # type: ignore[attr-defined] + if training_args.local_rank in (None, -1, 0): + wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] + wb.log({f'task/{current_dataset_name}/last': metrics}) # type: ignore[attr-defined] # Save and push to hub trainer.save_model(os.path.join(training_args.output_dir, 'last')) diff --git a/benchmarks/ppo/ppo_continual.py b/benchmarks/ppo/ppo_continual.py index 5fe18513..f2cfa1c3 100644 --- a/benchmarks/ppo/ppo_continual.py +++ b/benchmarks/ppo/ppo_continual.py @@ -3,6 +3,7 @@ import os import torch +import wandb as wb from continual_ppo_trainer import ( ContinualPPOArguments, ContinualPPOConfig, @@ -23,7 +24,6 @@ ) from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE -import wandb as wb from benchmarks.dataloading import init_continual_dataset @@ -143,6 +143,10 @@ def main( eval_dataset=dataset[script_args.dataset_test_split], peft_config=peft_config, ) + + if i == 0: + trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # Set current task in trainer for task-based logging trainer.set_task(f'task_{i}') @@ -164,8 +168,9 @@ def main( trainer.save_metrics('eval', metrics) # Log metrics to WandB - wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] - wb.log({f'task/{custom_repo_name}/last': metrics}) # type: ignore[attr-defined] + if training_args.local_rank in (None, -1, 0): + wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] + wb.log({f'task/{custom_repo_name}/last': metrics}) # type: ignore[attr-defined] # Save model checkpoint and optionally push if not training_args.push_to_hub: diff --git a/benchmarks/ppo_ewc/ppo_EWC_continual.py b/benchmarks/ppo_ewc/ppo_EWC_continual.py index 211bc56a..c71e90e6 100644 --- a/benchmarks/ppo_ewc/ppo_EWC_continual.py +++ b/benchmarks/ppo_ewc/ppo_EWC_continual.py @@ -3,6 +3,7 @@ import os import torch +import wandb as wb from datasets import Dataset from transformers import ( AutoModelForCausalLM, @@ -18,7 +19,6 @@ ) from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE -import wandb as wb from benchmarks.dataloading import init_continual_dataset from benchmarks.ppo_ewc.continual_ppo_EWC_trainer import ( ContinualPPOEWCArguments, @@ -176,8 +176,9 @@ def main( trainer.save_metrics('eval', metrics) # Log metrics to WandB - wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] - wb.log({f'task/{custom_repo_name}/last': metrics}) # type: ignore[attr-defined] + if training_args.local_rank in (None, -1, 0): + wb.log({'eval': {'last': metrics}}) # type: ignore[attr-defined] + wb.log({f'task/{custom_repo_name}/last': metrics}) # type: ignore[attr-defined] # Save model checkpoint and optionally push if not training_args.push_to_hub: From 07342977b076ebfb277b604cc1422755a3349d83 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 1 May 2025 23:34:20 -0400 Subject: [PATCH 06/15] sm fixes, hf upload --- benchmarks/dpo/dpo_continual.py | 4 +-- benchmarks/dpo_ewc/dpo_EWC_continual.py | 4 +-- benchmarks/hf_upload_models.py | 28 +++++++++++++++++++ benchmarks/ppo/README.md | 4 +-- .../accelerate_configs/deepspeed_zero2.yaml | 2 +- benchmarks/ppo/ppo_continual.py | 4 +-- pyproject.toml | 3 +- 7 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 benchmarks/hf_upload_models.py diff --git a/benchmarks/dpo/dpo_continual.py b/benchmarks/dpo/dpo_continual.py index f1edf2fe..7ff9bd4a 100644 --- a/benchmarks/dpo/dpo_continual.py +++ b/benchmarks/dpo/dpo_continual.py @@ -132,8 +132,8 @@ def main( peft_config=peft_config, ) - if i == 0: - trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # if i == 0: + # trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) # TODO will throw Invalidate trace cache @ step 10: expected module 11, but got module 19 # https://github.com/deepspeedai/DeepSpeed/issues/6870 diff --git a/benchmarks/dpo_ewc/dpo_EWC_continual.py b/benchmarks/dpo_ewc/dpo_EWC_continual.py index 547e87c0..021a4e71 100644 --- a/benchmarks/dpo_ewc/dpo_EWC_continual.py +++ b/benchmarks/dpo_ewc/dpo_EWC_continual.py @@ -132,8 +132,8 @@ def main( peft_config=peft_config, ) - if i == 0: - trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # if i == 0: + # trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) # TODO will throw Invalidate trace cache @ step 10: expected module 11, but got module 19 # https://github.com/deepspeedai/DeepSpeed/issues/6870 diff --git a/benchmarks/hf_upload_models.py b/benchmarks/hf_upload_models.py new file mode 100644 index 00000000..ebca79c2 --- /dev/null +++ b/benchmarks/hf_upload_models.py @@ -0,0 +1,28 @@ +from huggingface_hub import HfApi, upload_folder + +datasets="aifgen-long-piecewise aifgen-lipschitz aifgen-piecewise-preference-shift aifgen-domain-preference-shift aifgen-short-piecewise CPPO-REWARD" +dataset_indices="0 1 2 3 4 5 6 7 8 9" +# datasets="aifgen-long-piecewise" +# dataset_indices="0" + +for dataset_name in datasets.split(): + for dataset_index in dataset_indices.split(): + # Upload the model to the Hugging Face Hub + try: + repo_id = f"LifelongAlignment/{dataset_name}-{dataset_index}-reward-model" + api = HfApi() + api.create_repo(repo_id, repo_type="model", exist_ok=True, private=False) + + path = f"/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/{dataset_name}/Qwen2.5-0.5B-Reward-8gpus/Qwen2.5-0.5B-Instruct_{dataset_name}_REWARD_{dataset_index}" + print('path', path) + + upload_folder( + repo_id=repo_id, + # path_in_repo=f"{dataset_name}-{dataset_index}/reward-model", + folder_path=path, + commit_message="Upload AIFGen reward model", + repo_type="model", + ) + except: + print(f"Failed to upload {dataset_name}-{dataset_index} reward model") + continue \ No newline at end of file diff --git a/benchmarks/ppo/README.md b/benchmarks/ppo/README.md index 21928fb2..63fa30b3 100644 --- a/benchmarks/ppo/README.md +++ b/benchmarks/ppo/README.md @@ -32,7 +32,7 @@ uv run benchmarks/ppo/ppo_continual.py \ --use_peft \ --lora_r 32 \ --lora_alpha 16 \ - --push_to_hub True + --push_to_hub False ``` ### Using accelerate launch (with DeepSpeed / multi-GPU) @@ -62,7 +62,7 @@ accelerate launch --config_file benchmarks/ppo/accelerate_configs/deepspeed_zero --use_peft \ --lora_r 32 \ --lora_alpha 16 \ - --push_to_hub True + --push_to_hub False ``` *Make sure you do not add the dataset index to the reward model name as the script itself iterates over the dataset indices.* diff --git a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml index 8046cccc..239b14ac 100644 --- a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml @@ -12,7 +12,7 @@ machine_rank: 0 main_training_function: main mixed_precision: 'bf16' num_machines: 1 -num_processes: 1 +num_processes: 8 rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/ppo/ppo_continual.py b/benchmarks/ppo/ppo_continual.py index f2cfa1c3..adfb8f1c 100644 --- a/benchmarks/ppo/ppo_continual.py +++ b/benchmarks/ppo/ppo_continual.py @@ -144,8 +144,8 @@ def main( peft_config=peft_config, ) - if i == 0: - trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) + # if i == 0: + # trainer.save_model(os.path.join(training_args.output_dir, 'checkpoint-0')) # Set current task in trainer for task-based logging trainer.set_task(f'task_{i}') diff --git a/pyproject.toml b/pyproject.toml index b90c8e52..d895846f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ dependencies = [ "pydantic>=2.10.4", "pytest-asyncio>=0.25.3", "pytest-mock>=3.14.0", - "torch==2.3.0", +# "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.0%2Brocm6.0-cp312-cp312-linux_x86_64.whl#sha256=992c1ffb65c773a5848e4bbe22235c0386a7915690615ad68a45609228c13269", + "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.0%2Brocm6.0-cp310-cp310-linux_x86_64.whl#sha256=266af54cf4704aae08719305c205f0d12f40874006d3b8058f38e2f8ed08f56d", "types-pyyaml>=6.0.12.20241230", ] From fcf7671a9b72fc936b9115187eeb9d0ff4cae2af Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 1 May 2025 23:34:32 -0400 Subject: [PATCH 07/15] sm fixes, hf upload --- benchmarks/hf_upload_models.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/hf_upload_models.py b/benchmarks/hf_upload_models.py index ebca79c2..02646994 100644 --- a/benchmarks/hf_upload_models.py +++ b/benchmarks/hf_upload_models.py @@ -1,7 +1,7 @@ from huggingface_hub import HfApi, upload_folder -datasets="aifgen-long-piecewise aifgen-lipschitz aifgen-piecewise-preference-shift aifgen-domain-preference-shift aifgen-short-piecewise CPPO-REWARD" -dataset_indices="0 1 2 3 4 5 6 7 8 9" +datasets = 'aifgen-long-piecewise aifgen-lipschitz aifgen-piecewise-preference-shift aifgen-domain-preference-shift aifgen-short-piecewise CPPO-REWARD' +dataset_indices = '0 1 2 3 4 5 6 7 8 9' # datasets="aifgen-long-piecewise" # dataset_indices="0" @@ -9,20 +9,20 @@ for dataset_index in dataset_indices.split(): # Upload the model to the Hugging Face Hub try: - repo_id = f"LifelongAlignment/{dataset_name}-{dataset_index}-reward-model" + repo_id = f'LifelongAlignment/{dataset_name}-{dataset_index}-reward-model' api = HfApi() - api.create_repo(repo_id, repo_type="model", exist_ok=True, private=False) + api.create_repo(repo_id, repo_type='model', exist_ok=True, private=False) - path = f"/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/{dataset_name}/Qwen2.5-0.5B-Reward-8gpus/Qwen2.5-0.5B-Instruct_{dataset_name}_REWARD_{dataset_index}" + path = f'/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/{dataset_name}/Qwen2.5-0.5B-Reward-8gpus/Qwen2.5-0.5B-Instruct_{dataset_name}_REWARD_{dataset_index}' print('path', path) upload_folder( repo_id=repo_id, # path_in_repo=f"{dataset_name}-{dataset_index}/reward-model", folder_path=path, - commit_message="Upload AIFGen reward model", - repo_type="model", + commit_message='Upload AIFGen reward model', + repo_type='model', ) except: - print(f"Failed to upload {dataset_name}-{dataset_index} reward model") - continue \ No newline at end of file + print(f'Failed to upload {dataset_name}-{dataset_index} reward model') + continue From b219b419cb370c700edb4414bb9007a680c601ac Mon Sep 17 00:00:00 2001 From: avecplezir Date: Fri, 2 May 2025 01:57:34 -0400 Subject: [PATCH 08/15] remove accumulate in ppo trainer --- benchmarks/ppo/README.md | 20 +- .../accelerate_configs/deepspeed_zero2.yaml | 7 +- benchmarks/ppo/continual_ppo_trainer.py | 204 +++++++++--------- pyproject.toml | 3 +- 4 files changed, 120 insertions(+), 114 deletions(-) diff --git a/benchmarks/ppo/README.md b/benchmarks/ppo/README.md index 63fa30b3..1127788a 100644 --- a/benchmarks/ppo/README.md +++ b/benchmarks/ppo/README.md @@ -20,6 +20,7 @@ uv run benchmarks/ppo/ppo_continual.py \ --reward_model_path Shahradmz/Qwen2-0.5B-Instruct_continual_data_debug_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 1 \ + --gradient_accumulation_steps 2 \ --gradient_accumulation_steps 8 \ --gradient_checkpointing \ --logging_steps 20 \ @@ -50,12 +51,12 @@ accelerate launch --config_file benchmarks/ppo/accelerate_configs/deepspeed_zero --learning_rate 5.0e-6 \ --num_train_epochs 1 \ --per_device_train_batch_size 2 \ - --gradient_accumulation_steps 8 \ + --gradient_accumulation_steps 1 \ --gradient_checkpointing \ - --logging_steps 2 \ + --logging_steps 10 \ --eval_strategy steps \ - --eval_steps 5 \ - --save_steps 5 \ + --eval_steps 10 \ + --save_steps 10 \ --bf16 \ --output_dir "$SCRATCH/Qwen2-0.5B-PPO-test" \ --no_remove_unused_columns \ @@ -70,7 +71,8 @@ accelerate launch --config_file benchmarks/ppo/accelerate_configs/deepspeed_zero ### Full Training (without PEFT push, for local evaluation) ```sh -uv run benchmarks/ppo/ppo_continual.py \ +accelerate launch --config_file benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/ppo/ppo_continual.py \ --dataset_name benchmarks/continual_data_debug.json \ --mock False \ --sft_model_path Qwen/Qwen2-0.5B-Instruct \ @@ -78,14 +80,16 @@ uv run benchmarks/ppo/ppo_continual.py \ --reward_model_path Shahradmz/Qwen2-0.5B-Instruct_continual_data_debug_REWARD \ --learning_rate 5.0e-7 \ --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --gradient_accumulation_steps 8 \ + --bf16 \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 1 \ --gradient_checkpointing \ --logging_steps 20 \ --eval_strategy steps \ --eval_steps 20 \ --output_dir "$SCRATCH/Qwen2-0.5B-PPO" \ - --no_remove_unused_columns + --no_remove_unused_columns \ + --push_to_hub False ``` ### Run a Sweep with wandb diff --git a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml index 239b14ac..825c1fcc 100644 --- a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml @@ -2,17 +2,18 @@ compute_environment: LOCAL_MACHINE debug: false deepspeed_config: deepspeed_multinode_launcher: standard - offload_optimizer_device: none - offload_param_device: none + offload_optimizer_device: cpu + offload_param_device: cpu zero3_init_flag: false zero_stage: 2 + gradient_accumulation_steps: 8 distributed_type: DEEPSPEED downcast_bf16: 'no' machine_rank: 0 main_training_function: main mixed_precision: 'bf16' num_machines: 1 -num_processes: 8 +num_processes: 2 rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/ppo/continual_ppo_trainer.py b/benchmarks/ppo/continual_ppo_trainer.py index faa5d805..7718b9c7 100644 --- a/benchmarks/ppo/continual_ppo_trainer.py +++ b/benchmarks/ppo/continual_ppo_trainer.py @@ -729,110 +729,110 @@ def repeat_generator() -> DataLoader: for micro_batch_start in range( 0, args.local_mini_batch_size, args.per_device_train_batch_size ): - with accelerator.accumulate(model): - micro_batch_end = ( - micro_batch_start + args.per_device_train_batch_size - ) - micro_batch_inds = mini_batch_inds[ - micro_batch_start:micro_batch_end - ] - mb_advantage = advantages[micro_batch_inds] - mb_responses = responses[micro_batch_inds] - mb_query_responses = query_responses[micro_batch_inds] - mb_logprobs = logprobs[micro_batch_inds] - mb_return = returns[micro_batch_inds] - mb_values = values[micro_batch_inds] - - output, vpred_temp = forward( - model, mb_query_responses, processing_class.pad_token_id - ) - logits = output.logits[:, context_length - 1 : -1] - logits /= args.temperature + 1e-7 - new_logprobs = selective_log_softmax(logits, mb_responses) - new_logprobs = torch.masked_fill( - new_logprobs, - padding_mask[micro_batch_inds], - INVALID_LOGPROB, - ) - vpred = vpred_temp[:, context_length - 1 : -1].squeeze(-1) - vpred = torch.masked_fill( - vpred, padding_mask_p1[micro_batch_inds], 0 - ) - vpredclipped = torch.clamp( - vpred, - mb_values - args.cliprange_value, - mb_values + args.cliprange_value, - ) - vf_losses1 = torch.square(vpred - mb_return) - vf_losses2 = torch.square(vpredclipped - mb_return) - vf_loss_max = torch.max(vf_losses1, vf_losses2) - vf_loss = 0.5 * masked_mean( - vf_loss_max, ~padding_mask_p1[micro_batch_inds] - ) - vf_clipfrac = masked_mean( - (vf_losses2 > vf_losses1).float(), - ~padding_mask_p1[micro_batch_inds], - ) - logprobs_diff = new_logprobs - mb_logprobs - ratio = torch.exp(logprobs_diff) - pg_losses = -mb_advantage * ratio - pg_losses2 = -mb_advantage * torch.clamp( - ratio, 1.0 - args.cliprange, 1.0 + args.cliprange + # with accelerator.accumulate(model): + micro_batch_end = ( + micro_batch_start + args.per_device_train_batch_size + ) + micro_batch_inds = mini_batch_inds[ + micro_batch_start:micro_batch_end + ] + mb_advantage = advantages[micro_batch_inds] + mb_responses = responses[micro_batch_inds] + mb_query_responses = query_responses[micro_batch_inds] + mb_logprobs = logprobs[micro_batch_inds] + mb_return = returns[micro_batch_inds] + mb_values = values[micro_batch_inds] + + output, vpred_temp = forward( + model, mb_query_responses, processing_class.pad_token_id + ) + logits = output.logits[:, context_length - 1 : -1] + logits /= args.temperature + 1e-7 + new_logprobs = selective_log_softmax(logits, mb_responses) + new_logprobs = torch.masked_fill( + new_logprobs, + padding_mask[micro_batch_inds], + INVALID_LOGPROB, + ) + vpred = vpred_temp[:, context_length - 1 : -1].squeeze(-1) + vpred = torch.masked_fill( + vpred, padding_mask_p1[micro_batch_inds], 0 + ) + vpredclipped = torch.clamp( + vpred, + mb_values - args.cliprange_value, + mb_values + args.cliprange_value, + ) + vf_losses1 = torch.square(vpred - mb_return) + vf_losses2 = torch.square(vpredclipped - mb_return) + vf_loss_max = torch.max(vf_losses1, vf_losses2) + vf_loss = 0.5 * masked_mean( + vf_loss_max, ~padding_mask_p1[micro_batch_inds] + ) + vf_clipfrac = masked_mean( + (vf_losses2 > vf_losses1).float(), + ~padding_mask_p1[micro_batch_inds], + ) + logprobs_diff = new_logprobs - mb_logprobs + ratio = torch.exp(logprobs_diff) + pg_losses = -mb_advantage * ratio + pg_losses2 = -mb_advantage * torch.clamp( + ratio, 1.0 - args.cliprange, 1.0 + args.cliprange + ) + pg_loss_max = torch.max(pg_losses, pg_losses2) + pg_loss = masked_mean( + pg_loss_max, ~padding_mask[micro_batch_inds] + ) + loss = pg_loss + args.vf_coef * vf_loss + accelerator.backward(loss) + optimizer.step() + optimizer.zero_grad() + with torch.no_grad(): + pg_clipfrac = masked_mean( + (pg_losses2 > pg_losses).float(), + ~padding_mask[micro_batch_inds], ) - pg_loss_max = torch.max(pg_losses, pg_losses2) - pg_loss = masked_mean( - pg_loss_max, ~padding_mask[micro_batch_inds] + prob_dist = torch.nn.functional.softmax(logits, dim=-1) + entropy = torch.logsumexp(logits, dim=-1) - torch.sum( + prob_dist * logits, dim=-1 ) - loss = pg_loss + args.vf_coef * vf_loss - accelerator.backward(loss) - optimizer.step() - optimizer.zero_grad() - with torch.no_grad(): - pg_clipfrac = masked_mean( - (pg_losses2 > pg_losses).float(), - ~padding_mask[micro_batch_inds], - ) - prob_dist = torch.nn.functional.softmax(logits, dim=-1) - entropy = torch.logsumexp(logits, dim=-1) - torch.sum( - prob_dist * logits, dim=-1 - ) - approxkl = 0.5 * (logprobs_diff**2).mean() - approxkl_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = approxkl - pg_clipfrac_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = pg_clipfrac - pg_loss_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = pg_loss - vf_loss_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = vf_loss - vf_clipfrac_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = vf_clipfrac - entropy_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = entropy.mean() - ratio_stats[ - ppo_epoch_idx, - minibatch_idx, - gradient_accumulation_idx, - ] = ratio.mean() - gradient_accumulation_idx += 1 + approxkl = 0.5 * (logprobs_diff**2).mean() + approxkl_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = approxkl + pg_clipfrac_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = pg_clipfrac + pg_loss_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = pg_loss + vf_loss_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = vf_loss + vf_clipfrac_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = vf_clipfrac + entropy_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = entropy.mean() + ratio_stats[ + ppo_epoch_idx, + minibatch_idx, + gradient_accumulation_idx, + ] = ratio.mean() + gradient_accumulation_idx += 1 minibatch_idx += 1 # del everything and empty cache # fmt: off diff --git a/pyproject.toml b/pyproject.toml index d895846f..1533d5df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,9 @@ dependencies = [ "pydantic>=2.10.4", "pytest-asyncio>=0.25.3", "pytest-mock>=3.14.0", + "torch==2.3.0", # "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.0%2Brocm6.0-cp312-cp312-linux_x86_64.whl#sha256=992c1ffb65c773a5848e4bbe22235c0386a7915690615ad68a45609228c13269", - "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.0%2Brocm6.0-cp310-cp310-linux_x86_64.whl#sha256=266af54cf4704aae08719305c205f0d12f40874006d3b8058f38e2f8ed08f56d", +# "torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.0%2Brocm6.0-cp310-cp310-linux_x86_64.whl#sha256=266af54cf4704aae08719305c205f0d12f40874006d3b8058f38e2f8ed08f56d", "types-pyyaml>=6.0.12.20241230", ] From bea9d79c21122153f0f654745001d24fbb63ef10 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Fri, 2 May 2025 14:30:04 -0400 Subject: [PATCH 09/15] fix dpo ewc --- .../accelerate_configs/deepspeed_zero2.yaml | 4 +-- .../accelerate_configs/deepspeed_zero3.yaml | 2 +- .../dpo_ewc/continual_dpo_EWC_trainer.py | 28 +++++++++++++------ .../accelerate_configs/deepspeed_zero2.yaml | 8 +++--- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml index 877a5b8f..f369ef96 100644 --- a/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml @@ -11,8 +11,8 @@ downcast_bf16: 'no' machine_rank: 0 main_training_function: main mixed_precision: 'bf16' -num_machines: 2 -num_processes: 1 +num_machines: 1 +num_processes: 2 rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml index 6b68067b..29507c4c 100644 --- a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml @@ -11,7 +11,7 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 8 # TODO change to whatever number of gpus is used +num_processes: 2 # TODO change to whatever number of gpus is used rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py b/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py index 815dd537..45b2af0c 100644 --- a/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py +++ b/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py @@ -1,6 +1,7 @@ from dataclasses import dataclass, field from typing import Any, Dict, Optional, Union +import deepspeed import torch import torch.nn as nn from transformers import PreTrainedModel @@ -119,7 +120,13 @@ def compute_ewc_loss(self) -> torch.Tensor: # Calculate the EWC penalty for each parameter model = self.accelerator.unwrap_model(self.model) + for name, param in model.named_parameters(): + if name not in ContinualDPOEWCTrainer.class_fisher_information: + continue + if not param.requires_grad: + continue + if ( name in ContinualDPOEWCTrainer.class_fisher_information and param.requires_grad @@ -128,13 +135,15 @@ def compute_ewc_loss(self) -> torch.Tensor: fisher = ContinualDPOEWCTrainer.class_fisher_information[name].to( param.device ) - old_param = ContinualDPOEWCTrainer.class_old_params[name].to( - param.device - ) - # Calculate squared distance weighted by Fisher information - delta = param - old_param - ewc_loss += (fisher * delta.pow(2)).sum() + with deepspeed.zero.GatheredParameters([param], modifier_rank=0): + if self.accelerator.is_main_process: + old_param = ContinualDPOEWCTrainer.class_old_params[name].to( + param.device + ) + # Calculate squared distance weighted by Fisher information + delta = param - old_param + ewc_loss = ewc_loss + (fisher * delta.pow(2)).sum() # Apply the EWC lambda coefficient and return return 0.5 * self.ewc_lambda * ewc_loss @@ -237,9 +246,12 @@ def store_current_parameters(self) -> Dict[str, torch.Tensor]: """ model = self.accelerator.unwrap_model(self.model) old_params = {} + for name, param in model.named_parameters(): - if param.requires_grad: - old_params[name] = param.data.clone().detach() + with deepspeed.zero.GatheredParameters([param], modifier_rank=0): + if self.accelerator.is_main_process: + if param.requires_grad: + old_params[name] = param.data.clone().detach() return old_params def train(self) -> Any: diff --git a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml index 825c1fcc..27d04d8d 100644 --- a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml @@ -2,18 +2,18 @@ compute_environment: LOCAL_MACHINE debug: false deepspeed_config: deepspeed_multinode_launcher: standard - offload_optimizer_device: cpu - offload_param_device: cpu + offload_optimizer_device: none + offload_param_device: none zero3_init_flag: false zero_stage: 2 - gradient_accumulation_steps: 8 + gradient_accumulation_steps: 4 distributed_type: DEEPSPEED downcast_bf16: 'no' machine_rank: 0 main_training_function: main mixed_precision: 'bf16' num_machines: 1 -num_processes: 2 +num_processes: 8 rdzv_backend: static same_network: true tpu_env: [] From 8f0660e4cc40aeafad5d48798ff90e15f42cc5d0 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Wed, 7 May 2025 13:07:35 -0400 Subject: [PATCH 10/15] sm fixes --- benchmarks/continual_eval_checkpoints.py | 46 +++++++++++++------ .../accelerate_configs/deepspeed_zero3.yaml | 2 +- benchmarks/ppo/continual_ppo_trainer.py | 4 +- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/benchmarks/continual_eval_checkpoints.py b/benchmarks/continual_eval_checkpoints.py index 016887fa..a5302966 100644 --- a/benchmarks/continual_eval_checkpoints.py +++ b/benchmarks/continual_eval_checkpoints.py @@ -1,9 +1,9 @@ -"""Evaluating checkpoints obtained from training using the dpo_continual script.""" - import glob import os +import re import torch +import wandb as wb from dataloading import init_continual_dataset from datasets import Dataset from dpo.continual_dpo_trainer import ( @@ -17,9 +17,7 @@ AutoTokenizer, ) from trl import ( - DPOConfig, ModelConfig, - ScriptArguments, TrlParser, get_kbit_device_map, get_peft_config, @@ -27,12 +25,10 @@ ) from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE -import wandb as wb - def main( - script_args: ScriptArguments, - training_args: DPOConfig, + script_args: ContinualDPOArguments, + training_args: ContinualDPOConfig, model_args: ModelConfig, ) -> None: # Determine torch dtype and quantization configs @@ -41,6 +37,9 @@ def main( if model_args.torch_dtype in ['auto', None] else getattr(torch, model_args.torch_dtype) ) + if script_args.wandb_run_name is not None: + training_args.run_name = script_args.wandb_run_name + quantization_config = get_quantization_config(model_args) # Model & Tokenizer Setup @@ -87,14 +86,26 @@ def main( # Validate reward model paths if provided for i, _ in enumerate(continual_dataset): - reward_path = os.path.join(training_args.reward_model_path, str(i)) + reward_path = training_args.reward_model_path + '_' + str(i) if not os.path.exists(reward_path): raise FileNotFoundError( f'Reward model not found for dataset {i} at {reward_path}' ) checkpoint_paths = glob.glob(f'{script_args.checkpoint_dir}/*/*') - checkpoint_paths = sorted([ch for ch in checkpoint_paths if 'checkpoint' in ch]) + + def extract_indices(path): + match = re.search(r'dataset-(\d+)/checkpoint-(\d+)', path) + if match: + dataset_idx = int(match.group(1)) + checkpoint_idx = int(match.group(2)) + return (dataset_idx, checkpoint_idx) + else: + return (float('inf'), float('inf')) # in case of unexpected format + + checkpoint_paths = [ch for ch in checkpoint_paths if 'checkpoint' in ch] + checkpoint_paths.sort(key=extract_indices) + print('checkpoint_paths', checkpoint_paths) # Checkpoint loop for checkpoint_path in checkpoint_paths: @@ -103,14 +114,20 @@ def main( print( f'Evaluating checkpoint: {checkpoint_step} trained on dataset: {dataset_name} on all tasks' ) - adapter_name = dataset_name + checkpoint_step - model.load_adapter(checkpoint_path, adapter_name=adapter_name) + # adapter_name = dataset_name + checkpoint_step + # model.load_adapter(checkpoint_path, adapter_name=adapter_name) + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + trust_remote_code=model_args.trust_remote_code, + **model_kwargs, + ) metrics = {} # Task Loop for i, dataset in enumerate(continual_dataset): + print('task', i) reward_model = AutoModelForSequenceClassification.from_pretrained( - training_args.reward_model_path + f'/{str(i)}', num_labels=1 + training_args.reward_model_path + f'_{str(i)}', num_labels=1 ) training_args.output_dir = f'{output_dir}/dataset-{i}' @@ -130,7 +147,8 @@ def main( ev_metrics = {f'dataset-{i}/' + k: v for k, v in ev_metrics.items()} metrics.update(ev_metrics) - wb.log(metrics) # type: ignore[attr-defined] + if training_args.local_rank in (None, -1, 0): + wb.log(metrics) # type: ignore[attr-defined] print('Evaluation completed for all tasks and checkpoints!') diff --git a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml index 29507c4c..6b68067b 100644 --- a/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml +++ b/benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml @@ -11,7 +11,7 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 2 # TODO change to whatever number of gpus is used +num_processes: 8 # TODO change to whatever number of gpus is used rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/ppo/continual_ppo_trainer.py b/benchmarks/ppo/continual_ppo_trainer.py index 7718b9c7..505ac3e5 100644 --- a/benchmarks/ppo/continual_ppo_trainer.py +++ b/benchmarks/ppo/continual_ppo_trainer.py @@ -311,8 +311,8 @@ def __init__( # Training scheduling args.num_total_batches = math.ceil(args.total_episodes / args.batch_size) time_tensor = torch.tensor(int(time.time()), device=self.accelerator.device) - time_int = broadcast(time_tensor, 0).item() - args.run_name = f'{args.exp_name}__{args.seed}__{time_int}' + broadcast(time_tensor, 0).item() + # args.run_name = f'{args.exp_name}__{args.seed}__{time_int}' self.local_seed = args.seed + self.accelerator.process_index * 100003 # Prime if args.num_sample_generations > 0: self.sample_generations_freq = max( From 0d6630b263fd320d8c28bb06cc250a5d3bd01c74 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 8 May 2025 13:49:51 -0400 Subject: [PATCH 11/15] upd --- benchmarks/hf_upload_models.py | 5 +++-- benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml | 3 +-- benchmarks/ppo/ppo_continual.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/benchmarks/hf_upload_models.py b/benchmarks/hf_upload_models.py index 02646994..c50c805c 100644 --- a/benchmarks/hf_upload_models.py +++ b/benchmarks/hf_upload_models.py @@ -2,6 +2,7 @@ datasets = 'aifgen-long-piecewise aifgen-lipschitz aifgen-piecewise-preference-shift aifgen-domain-preference-shift aifgen-short-piecewise CPPO-REWARD' dataset_indices = '0 1 2 3 4 5 6 7 8 9' +model = 'Qwen2-0.5B' # datasets="aifgen-long-piecewise" # dataset_indices="0" @@ -9,11 +10,11 @@ for dataset_index in dataset_indices.split(): # Upload the model to the Hugging Face Hub try: - repo_id = f'LifelongAlignment/{dataset_name}-{dataset_index}-reward-model' + repo_id = f'LifelongAlignment/{model}-Instruct_{dataset_name}_REWARD_{dataset_index}' api = HfApi() api.create_repo(repo_id, repo_type='model', exist_ok=True, private=False) - path = f'/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/{dataset_name}/Qwen2.5-0.5B-Reward-8gpus/Qwen2.5-0.5B-Instruct_{dataset_name}_REWARD_{dataset_index}' + path = f'/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/{dataset_name}/{model}-Reward-8gpus/{model}-Instruct_{dataset_name}_REWARD_{dataset_index}' print('path', path) upload_folder( diff --git a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml index 27d04d8d..8046cccc 100644 --- a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml @@ -6,14 +6,13 @@ deepspeed_config: offload_param_device: none zero3_init_flag: false zero_stage: 2 - gradient_accumulation_steps: 4 distributed_type: DEEPSPEED downcast_bf16: 'no' machine_rank: 0 main_training_function: main mixed_precision: 'bf16' num_machines: 1 -num_processes: 8 +num_processes: 1 rdzv_backend: static same_network: true tpu_env: [] diff --git a/benchmarks/ppo/ppo_continual.py b/benchmarks/ppo/ppo_continual.py index adfb8f1c..8db6aff3 100644 --- a/benchmarks/ppo/ppo_continual.py +++ b/benchmarks/ppo/ppo_continual.py @@ -100,6 +100,7 @@ def main( if '.' in clean_dataset_name: clean_dataset_name = clean_dataset_name.split('.')[0] + print(f'Training PPO on {len(continual_dataset)} tasks') # check if the reward models are present either in the path or in the hub if training_args.reward_model_path is not None: for i in range(len(continual_dataset)): From e3846b5bf1cd60886c8a17103a95f6e3a1b37f18 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 8 May 2025 21:29:16 -0400 Subject: [PATCH 12/15] logging --- benchmarks/dataloading.py | 10 --- benchmarks/dpo/continual_dpo_trainer.py | 115 +++++++++++++++--------- 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/benchmarks/dataloading.py b/benchmarks/dataloading.py index 65b5dd7b..c6d8c704 100644 --- a/benchmarks/dataloading.py +++ b/benchmarks/dataloading.py @@ -89,22 +89,12 @@ def init_continual_dataset( data = ContinualAlignmentDataset.from_json(dataset) except OSError: # need to try downloading from hub try: - # json_name = dataset.split('/', )[-1] # print(f'Downloading {json_name} from Hugging Face Hub...') local_path = hf_hub_download( repo_id=f'LifelongAlignment/{dataset}', filename='data.json', repo_type='dataset', ) - # local_path = hf_hub_download( - # repo_id=f"LifelongAlignment/{dataset}", filename=f'{dataset}.json', repo_type='dataset' - # ) - # local_path = hf_hub_download( - # repo_id=f"LifelongAlignment/{dataset}", filename=f'{json_name}.json', repo_type='dataset' - # ) - # local_path = hf_hub_download( - # repo_id=dataset, filename='dataset.json', repo_type='dataset' - # ) data = ContinualAlignmentDataset.from_json(local_path) except Exception as e: raise ValueError(f'Error loading dataset: {e}') diff --git a/benchmarks/dpo/continual_dpo_trainer.py b/benchmarks/dpo/continual_dpo_trainer.py index 44374351..dc93f5ff 100644 --- a/benchmarks/dpo/continual_dpo_trainer.py +++ b/benchmarks/dpo/continual_dpo_trainer.py @@ -13,6 +13,8 @@ from accelerate import Accelerator, PartialState from accelerate.utils import gather_object from datasets import Dataset +from rich.console import Console +from rich.table import Table from torch.utils.data import DataLoader from transformers import ( BaseImageProcessor, @@ -328,23 +330,31 @@ def log( eval_policy_metrics = self.evaluate_policy() logs.update(eval_policy_metrics) - # TODO: Only generation sample completions every x steps - do_generate_completions = True - if do_generate_completions: - self._generate_completions() - torch.cuda.empty_cache() + # TODO: Only generation sample completions every x steps + do_generate_completions = True + if do_generate_completions: + self._generate_completions() + torch.cuda.empty_cache() return super().log(logs, start_time) def _generate_completions(self) -> None: # Config from: https://github.com/huggingface/trl/blob/56e57662053e2d0cc6302dad404820b0c0ec6a91/trl/trainer/ppo_trainer.py#L688 + # generation_config = GenerationConfig( + # max_new_tokens=53, + # temperature=(0.01 + 1e-7), + # top_k=0.0, + # top_p=1.0, + # do_sample=True, + # ) generation_config = GenerationConfig( - max_new_tokens=53, - temperature=(0.01 + 1e-7), + max_new_tokens=self.args.response_length, + temperature=(self.args.temperature + 1e-7), top_k=0.0, top_p=1.0, do_sample=True, ) + table = defaultdict(list) with torch.no_grad(): with unwrap_model_for_generation( @@ -352,44 +362,61 @@ def _generate_completions(self) -> None: self.accelerator, gather_deepspeed3_params=None, ) as unwrapped_model: - for batch in self.eval_dataloader: - query = batch['input_ids'] - context_length = query.shape[1] - query_response, _ = batch_generation( - unwrapped_model, - query, - query.shape[0], - self.processing_class.pad_token_id, - generation_config, - ) - response = query_response[:, context_length:] - postprocessed_response = response - postprocessed_query_response = torch.cat( - (query, postprocessed_response), 1 - ) - _, score, _ = get_reward( - self.reward_model, - postprocessed_query_response, - self.processing_class.pad_token_id, - context_length, - ) + if self.eval_policy_dataloader is not None: + for batch in self.eval_policy_dataloader: + query = batch['input_ids'] + context_length = query.shape[1] + query_response, _ = batch_generation( + unwrapped_model, + query, + query.shape[0], + self.processing_class.pad_token_id, + generation_config, + ) + response = query_response[:, context_length:] + postprocessed_response = response + postprocessed_query_response = torch.cat( + (query, postprocessed_response), 1 + ) + _, score, _ = get_reward( + self.reward_model, + postprocessed_query_response, + self.processing_class.pad_token_id, + context_length, + ) - queries = gather_object( - self.processing_class.batch_decode( - query, skip_special_tokens=True + queries = gather_object( + self.processing_class.batch_decode( + query, skip_special_tokens=True + ) ) - ) - responses = gather_object( - self.processing_class.batch_decode(postprocessed_response) - ) - scores = ( - self.accelerator.gather_for_metrics(score).float().cpu().numpy() - ) - table['query'].extend(queries) - table['model response'].extend(responses) - table['score'].extend(scores) - break + responses = gather_object( + self.processing_class.batch_decode(postprocessed_response) + ) + scores = ( + self.accelerator.gather_for_metrics(score) + .float() + .cpu() + .numpy() + ) + table['query'].extend(queries) + table['model response'].extend(responses) + table['score'].extend(scores) + break df = pd.DataFrame(table) - if self.accelerator.is_main_process and wb.run is not None: - wb.log({'completions': wb.Table(dataframe=df)}) + + if self.accelerator.is_main_process: + print_rich_table(df.iloc[0 : 0 + 5]) + if wb.run is not None: + wb.log({'completions': wb.Table(dataframe=df)}) + + +def print_rich_table(df: pd.DataFrame) -> Table: + console = Console() + table = Table(show_lines=True) + for column in df.columns: + table.add_column(column) + for _, row in df.iterrows(): + table.add_row(*row.astype(str).tolist()) + console.print(table) From 305f08ad4e439f072ba4e35e6b8410a217cda3df Mon Sep 17 00:00:00 2001 From: avecplezir Date: Thu, 8 May 2025 21:53:33 -0400 Subject: [PATCH 13/15] logging --- benchmarks/dpo/continual_dpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/dpo/continual_dpo_trainer.py b/benchmarks/dpo/continual_dpo_trainer.py index dc93f5ff..ce507021 100644 --- a/benchmarks/dpo/continual_dpo_trainer.py +++ b/benchmarks/dpo/continual_dpo_trainer.py @@ -406,7 +406,7 @@ def _generate_completions(self) -> None: df = pd.DataFrame(table) - if self.accelerator.is_main_process: + if self.accelerator.is_main_process or self.accelerator is None: print_rich_table(df.iloc[0 : 0 + 5]) if wb.run is not None: wb.log({'completions': wb.Table(dataframe=df)}) From fcdaaaaa2b9f612d156274898373a658ea15cfe2 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Fri, 9 May 2025 18:45:17 -0400 Subject: [PATCH 14/15] upd eval scripts --- benchmarks/continual_eval_checkpoints.py | 9 ++ benchmarks/dpo/continual_dpo_trainer.py | 8 +- benchmarks/parallel_eval_checkpoints.py | 142 +++++++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 benchmarks/parallel_eval_checkpoints.py diff --git a/benchmarks/continual_eval_checkpoints.py b/benchmarks/continual_eval_checkpoints.py index a5302966..6f748b1d 100644 --- a/benchmarks/continual_eval_checkpoints.py +++ b/benchmarks/continual_eval_checkpoints.py @@ -146,6 +146,15 @@ def extract_indices(path): ev_metrics = trainer.evaluate() ev_metrics = {f'dataset-{i}/' + k: v for k, v in ev_metrics.items()} metrics.update(ev_metrics) + if training_args.local_rank in (None, -1, 0): + wb.log({f'task/{dataset_name}/{k}': v for k, v in ev_metrics.items()}) + + # If using DeepSpeed through Accelerate, tear down the engine after training. + if hasattr(trainer, 'deepspeed') and trainer.deepspeed is not None: + # Remove reference to the DeepSpeed engine to allow proper cleanup. + del trainer.deepspeed + # Free cached GPU memory. + torch.cuda.empty_cache() if training_args.local_rank in (None, -1, 0): wb.log(metrics) # type: ignore[attr-defined] diff --git a/benchmarks/dpo/continual_dpo_trainer.py b/benchmarks/dpo/continual_dpo_trainer.py index ce507021..024cbc08 100644 --- a/benchmarks/dpo/continual_dpo_trainer.py +++ b/benchmarks/dpo/continual_dpo_trainer.py @@ -286,7 +286,10 @@ def evaluate_policy(self) -> dict: with torch.no_grad(): if self.eval_policy_dataloader is not None: - for batch in self.eval_policy_dataloader: + for idx, batch in enumerate(self.eval_policy_dataloader): + print( + f'Processing batch {idx} out of {len(self.eval_policy_dataloader)}' + ) query = batch['input_ids'].to(self.accelerator.device) context_length = query.shape[1] with unwrap_model_for_generation( @@ -333,6 +336,7 @@ def log( # TODO: Only generation sample completions every x steps do_generate_completions = True if do_generate_completions: + print('Generating completions...') self._generate_completions() torch.cuda.empty_cache() @@ -355,6 +359,7 @@ def _generate_completions(self) -> None: do_sample=True, ) + self.model.eval() table = defaultdict(list) with torch.no_grad(): with unwrap_model_for_generation( @@ -404,6 +409,7 @@ def _generate_completions(self) -> None: table['score'].extend(scores) break + self.model.train() df = pd.DataFrame(table) if self.accelerator.is_main_process or self.accelerator is None: diff --git a/benchmarks/parallel_eval_checkpoints.py b/benchmarks/parallel_eval_checkpoints.py new file mode 100644 index 00000000..158deb65 --- /dev/null +++ b/benchmarks/parallel_eval_checkpoints.py @@ -0,0 +1,142 @@ +import os + +import torch +import wandb as wb +from dataloading import init_continual_dataset +from datasets import Dataset +from dpo.continual_dpo_trainer import ( + ContinualDPOArguments, + ContinualDPOConfig, + ContinualDPOTrainer, +) +from transformers import ( + AutoModelForCausalLM, + AutoModelForSequenceClassification, + AutoTokenizer, +) +from trl import ( + ModelConfig, + TrlParser, + get_kbit_device_map, + get_peft_config, + get_quantization_config, +) +from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE + + +def main( + script_args: ContinualDPOArguments, + training_args: ContinualDPOConfig, + model_args: ModelConfig, +) -> None: + # Determine torch dtype and quantization configs + torch_dtype = ( + model_args.torch_dtype + if model_args.torch_dtype in ['auto', None] + else getattr(torch, model_args.torch_dtype) + ) + if script_args.wandb_run_name is not None: + training_args.run_name = script_args.wandb_run_name + + quantization_config = get_quantization_config(model_args) + + # Model & Tokenizer Setup + model_kwargs = dict( + revision=model_args.model_revision, + attn_implementation=model_args.attn_implementation, + torch_dtype=torch_dtype, + use_cache=False if training_args.gradient_checkpointing else True, + device_map=get_kbit_device_map() if quantization_config is not None else None, + quantization_config=quantization_config, + ) + + # Checkpoint loop + checkpoint_path = script_args.checkpoint_dir + dataset_name = checkpoint_path.split('/')[-2].replace('.', '') + checkpoint_step = checkpoint_path.split('/')[-1].replace('.', '') + print( + f'Evaluating checkpoint: {checkpoint_step} trained on dataset: {dataset_name} on all tasks' + ) + checkpoint_name = dataset_name + '_' + checkpoint_step + print('checkpoint_name', checkpoint_name) + + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + trust_remote_code=model_args.trust_remote_code, + **model_kwargs, + ) + peft_config = get_peft_config(model_args) + + ref_model = AutoModelForCausalLM.from_pretrained( + model_args.model_name_or_path, + trust_remote_code=model_args.trust_remote_code, + **model_kwargs, + ) + + # Load tokenizer and set chat template if needed + tokenizer = AutoTokenizer.from_pretrained( + model_args.model_name_or_path, trust_remote_code=model_args.trust_remote_code + ) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + if tokenizer.chat_template is None: + tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE + + # Initialize continual dataset + continual_dataset: list[dict[str, Dataset]] = init_continual_dataset( + script_args.dataset_name, + mock=training_args.mock, + tokenizer=tokenizer, + tools=getattr(training_args, 'tools', None), + ) + output_dir = training_args.output_dir + + # Validate reward model paths if provided + for i, _ in enumerate(continual_dataset): + reward_path = training_args.reward_model_path + '_' + str(i) + if not os.path.exists(reward_path): + raise FileNotFoundError( + f'Reward model not found for dataset {i} at {reward_path}' + ) + + # Task Loop + for i, dataset in enumerate(continual_dataset): + print('task', i) + reward_model = AutoModelForSequenceClassification.from_pretrained( + training_args.reward_model_path + f'_{str(i)}', num_labels=1 + ) + + training_args.output_dir = f'{output_dir}/dataset-{i}' + # using ContinualDPOTrainer for all pipelines (PPO, DPO, COPR, ..) only for evaluation + trainer = ContinualDPOTrainer( + args=training_args, + processing_class=tokenizer, + model=model, + ref_model=ref_model, + reward_model=reward_model, + train_dataset=dataset[script_args.dataset_test_split], + eval_dataset=dataset[script_args.dataset_test_split], + peft_config=peft_config, + ) + + print('evaluating...') + ev_metrics = trainer.evaluate() + # ev_metrics = {f'dataset-{i}/' + k: v for k, v in ev_metrics.items()} + if training_args.local_rank in (None, -1, 0): + print('ev_metrics', ev_metrics) + wb.log(ev_metrics) + wb.log({f'{checkpoint_name}/{k}': v for k, v in ev_metrics.items()}) + + # If using DeepSpeed through Accelerate, tear down the engine after training. + if hasattr(trainer, 'deepspeed') and trainer.deepspeed is not None: + # Remove reference to the DeepSpeed engine to allow proper cleanup. + del trainer.deepspeed + # Free cached GPU memory. + torch.cuda.empty_cache() + + +if __name__ == '__main__': + dataclass_types = (ContinualDPOArguments, ContinualDPOConfig, ModelConfig) + parser = TrlParser(dataclass_types) + script_args, training_args, model_args = parser.parse_args_and_config() + main(script_args, training_args, model_args) From ef6ef390fd46c2f785463a99ccee16842b8c4cf8 Mon Sep 17 00:00:00 2001 From: avecplezir Date: Tue, 13 May 2025 11:37:59 -0400 Subject: [PATCH 15/15] add parallel_eval_checkpoints --- .../dpo_ewc/continual_dpo_EWC_trainer.py | 76 +++++++++---------- benchmarks/parallel_eval_checkpoints.py | 39 ++++++++-- .../accelerate_configs/deepspeed_zero2.yaml | 2 +- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py b/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py index 45b2af0c..5ee13556 100644 --- a/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py +++ b/benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py @@ -116,37 +116,50 @@ def compute_ewc_loss(self) -> torch.Tensor: # No previous tasks, so no regularization needed return torch.tensor(0.0, device=self.accelerator.device) - ewc_loss = torch.tensor(0.0, device=self.accelerator.device) - # Calculate the EWC penalty for each parameter model = self.accelerator.unwrap_model(self.model) - + ewc_loss = torch.tensor(0.0, device=self.accelerator.device) for name, param in model.named_parameters(): - if name not in ContinualDPOEWCTrainer.class_fisher_information: - continue - if not param.requires_grad: + if not param.requires_grad or name not in self.class_fisher_information: continue - - if ( - name in ContinualDPOEWCTrainer.class_fisher_information - and param.requires_grad - ): - # Get the Fisher information and old parameter values - fisher = ContinualDPOEWCTrainer.class_fisher_information[name].to( - param.device - ) - + # self.accelerator.print(name, param.shape) with deepspeed.zero.GatheredParameters([param], modifier_rank=0): if self.accelerator.is_main_process: + # Get the Fisher information and old parameter values + fisher = ContinualDPOEWCTrainer.class_fisher_information[name].to( + self.accelerator.device + ) old_param = ContinualDPOEWCTrainer.class_old_params[name].to( - param.device + self.accelerator.device ) + # Calculate squared distance weighted by Fisher information delta = param - old_param ewc_loss = ewc_loss + (fisher * delta.pow(2)).sum() - # Apply the EWC lambda coefficient and return - return 0.5 * self.ewc_lambda * ewc_loss + # Apply the EWC lambda coefficient and return + ewc_loss = 0.5 * self.ewc_lambda * ewc_loss + else: + # Non-main processes should not compute EWC loss + ewc_loss = torch.tensor(0.0, device=self.accelerator.device) + + ewc_loss = self.accelerator.reduce(ewc_loss, 'mean') + return ewc_loss + + def store_current_parameters(self) -> Dict[str, torch.Tensor]: + """Store the current model parameters. + + Returns: + Dictionary mapping parameter names to their current values + """ + model = self.accelerator.unwrap_model(self.model) + old_params = {} + for name, param in model.named_parameters(): + with deepspeed.zero.GatheredParameters([param], modifier_rank=0): + if self.accelerator.is_main_process: + if param.requires_grad: + old_params[name] = param.data.clone().detach() + return old_params def compute_fisher_information( self, num_samples: int = 120 @@ -161,11 +174,6 @@ def compute_fisher_information( """ # Get unwrapped model for computing Fisher model = self.accelerator.unwrap_model(self.model) - self.accelerator.device - - # Make sure parameters require gradients - for param in model.parameters(): - param.requires_grad_(True) # Initialize fisher information dictionary fisher_info = {} @@ -206,7 +214,9 @@ def compute_fisher_information( model.zero_grad() try: - loss, _ = self.compute_loss(model, batch, return_outputs=True) + loss, _ = super(ContinualDPOEWCTrainer, self).compute_loss( + model, batch, return_outputs=True + ) # Check if loss requires gradient if not loss.requires_grad: @@ -238,22 +248,6 @@ def compute_fisher_information( print(f'Computed Fisher information for {sample_count} examples') return fisher_info - def store_current_parameters(self) -> Dict[str, torch.Tensor]: - """Store the current model parameters. - - Returns: - Dictionary mapping parameter names to their current values - """ - model = self.accelerator.unwrap_model(self.model) - old_params = {} - - for name, param in model.named_parameters(): - with deepspeed.zero.GatheredParameters([param], modifier_rank=0): - if self.accelerator.is_main_process: - if param.requires_grad: - old_params[name] = param.data.clone().detach() - return old_params - def train(self) -> Any: """Override train method to incorporate EWC regularization.""" # Regular training diff --git a/benchmarks/parallel_eval_checkpoints.py b/benchmarks/parallel_eval_checkpoints.py index 158deb65..c4921819 100644 --- a/benchmarks/parallel_eval_checkpoints.py +++ b/benchmarks/parallel_eval_checkpoints.py @@ -9,6 +9,7 @@ ContinualDPOConfig, ContinualDPOTrainer, ) +from safetensors import safe_open from transformers import ( AutoModelForCausalLM, AutoModelForSequenceClassification, @@ -30,6 +31,7 @@ def main( model_args: ModelConfig, ) -> None: # Determine torch dtype and quantization configs + torch_dtype = ( model_args.torch_dtype if model_args.torch_dtype in ['auto', None] @@ -52,7 +54,11 @@ def main( # Checkpoint loop checkpoint_path = script_args.checkpoint_dir - dataset_name = checkpoint_path.split('/')[-2].replace('.', '') + if 'PPO' in checkpoint_path: + dataset_name = 'dataset-' + checkpoint_path.split('/')[-2].split('_')[-1] + else: + dataset_name = checkpoint_path.split('/')[-2].replace('.', '') + checkpoint_step = checkpoint_path.split('/')[-1].replace('.', '') print( f'Evaluating checkpoint: {checkpoint_step} trained on dataset: {dataset_name} on all tasks' @@ -60,11 +66,32 @@ def main( checkpoint_name = dataset_name + '_' + checkpoint_step print('checkpoint_name', checkpoint_name) - model = AutoModelForCausalLM.from_pretrained( - checkpoint_path, - trust_remote_code=model_args.trust_remote_code, - **model_kwargs, - ) + if 'PPO' in checkpoint_path: + # remove the prefix 'policy.' from the keys to load the model; skip the critic and value model + prefix = 'policy.' + with safe_open( + checkpoint_path + '/model.safetensors', framework='pt', device='cpu' + ) as f: + clean_sd = { + k[len(prefix) :] if k.startswith(prefix) else k: f.get_tensor(k) + for k in f.keys() + if not ( + k.startswith('critic_backbone.') or k.startswith('value_model.') + ) + } + + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + trust_remote_code=model_args.trust_remote_code, + state_dict=clean_sd, + **model_kwargs, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + trust_remote_code=model_args.trust_remote_code, + **model_kwargs, + ) peft_config = get_peft_config(model_args) ref_model = AutoModelForCausalLM.from_pretrained( diff --git a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml index 8046cccc..239b14ac 100644 --- a/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml +++ b/benchmarks/ppo/accelerate_configs/deepspeed_zero2.yaml @@ -12,7 +12,7 @@ machine_rank: 0 main_training_function: main mixed_precision: 'bf16' num_machines: 1 -num_processes: 1 +num_processes: 8 rdzv_backend: static same_network: true tpu_env: []