Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
342 changes: 135 additions & 207 deletions benchmarks/dpo_ewc/continual_dpo_EWC_trainer.py

Large diffs are not rendered by default.

49 changes: 36 additions & 13 deletions benchmarks/parallel_eval_checkpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
ContinualDPOConfig,
ContinualDPOTrainer,
)
from safetensors import safe_open
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
Expand All @@ -30,6 +31,7 @@ def main(
model_args: ModelConfig,
) -> None:
# Determine torch dtype and quantization configs

torch_dtype = (
model_args.torch_dtype
if model_args.torch_dtype in ['auto', None]
Expand All @@ -45,26 +47,47 @@ def main(
revision=model_args.model_revision,
attn_implementation=model_args.attn_implementation,
torch_dtype=torch_dtype,
use_cache=False if training_args.gradient_checkpointing else True,
device_map=get_kbit_device_map() if quantization_config is not None else None,
quantization_config=quantization_config,
)

# Checkpoint loop
checkpoint_path = script_args.checkpoint_dir
dataset_name = checkpoint_path.split('/')[-2].replace('.', '')
if 'DPO' not in checkpoint_path:
dataset_name = 'dataset-' + checkpoint_path.split('/')[-2].split('_')[-1]
else:
dataset_name = checkpoint_path.split('/')[-2].replace('.', '')

checkpoint_step = checkpoint_path.split('/')[-1].replace('.', '')
print(
f'Evaluating checkpoint: {checkpoint_step} trained on dataset: {dataset_name} on all tasks'
)
checkpoint_name = dataset_name + '_' + checkpoint_step
print('checkpoint_name', checkpoint_name)

model = AutoModelForCausalLM.from_pretrained(
checkpoint_path,
trust_remote_code=model_args.trust_remote_code,
**model_kwargs,
)
if 'DPO' not in checkpoint_path:
base_model_name = model_args.model_name_or_path # Use the base model path for config

# Load config from base model first
from transformers import AutoConfig
config = AutoConfig.from_pretrained(
base_model_name,
trust_remote_code=model_args.trust_remote_code,
)
# remove the prefix 'policy.' from the keys to load the model; skip the critic and value model
model = AutoModelForCausalLM.from_pretrained(
checkpoint_path,
config=config,
trust_remote_code=model_args.trust_remote_code,
**model_kwargs,
)
else:
model = AutoModelForCausalLM.from_pretrained(
checkpoint_path,
trust_remote_code=model_args.trust_remote_code,
local_files_only=True,
**model_kwargs,
)
peft_config = get_peft_config(model_args)

ref_model = AutoModelForCausalLM.from_pretrained(
Expand Down Expand Up @@ -92,12 +115,12 @@ def main(
output_dir = training_args.output_dir

# Validate reward model paths if provided
for i, _ in enumerate(continual_dataset):
reward_path = training_args.reward_model_path + '_' + str(i)
if not os.path.exists(reward_path):
raise FileNotFoundError(
f'Reward model not found for dataset {i} at {reward_path}'
)
# for i, _ in enumerate(continual_dataset):
# reward_path = training_args.reward_model_path + '_' + str(i)
# if not os.path.exists(reward_path):
# raise FileNotFoundError(
# f'Reward model not found for dataset {i} at {reward_path}'
# )

# Task Loop
for i, dataset in enumerate(continual_dataset):
Expand Down
2 changes: 1 addition & 1 deletion jobs/cppo/cppo_domain_shift_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ accelerate launch --config_file benchmarks/cppo/accelerate_configs/deepspeed_zer
--eval_steps 200 \
--save_steps 300 \
--bf16 \
--output_dir "$HOME/Qwen2-0.5B-CPPO-${dataset_name}" \
--output_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-CPPO-${dataset_name}" \
--no_remove_unused_columns
2 changes: 1 addition & 1 deletion jobs/cppo/cppo_lipschitz_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ accelerate launch --config_file benchmarks/cppo/accelerate_configs/deepspeed_zer
--eval_steps 200 \
--save_steps 300 \
--bf16 \
--output_dir "$HOME/Qwen2-0.5B-CPPO-${dataset_name}" \
--output_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-CPPO-${dataset_name}" \
--no_remove_unused_columns
2 changes: 1 addition & 1 deletion jobs/cppo/cppo_long_piecewise_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ accelerate launch --config_file benchmarks/cppo/accelerate_configs/deepspeed_zer
--eval_steps 200 \
--save_steps 300 \
--bf16 \
--output_dir "$HOME/Qwen2-0.5B-CPPO-${dataset_name}" \
--output_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-CPPO-${dataset_name}" \
--no_remove_unused_columns
4 changes: 2 additions & 2 deletions jobs/cppo/cppo_piecewise_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ accelerate launch --config_file benchmarks/cppo/accelerate_configs/deepspeed_zer
--response_length 256 \
--num_train_epochs 4 \
--gradient_checkpointing \
--per_device_train_batch_size 8 \
--per_device_train_batch_size 4 \
--logging_steps 10 \
--eval_strategy steps \
--eval_steps 200 \
--save_steps 300 \
--bf16 \
--output_dir "$HOME/Qwen2-0.5B-CPPO-${dataset_name}" \
--output_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-CPPO-${dataset_name}" \
--no_remove_unused_columns
14 changes: 7 additions & 7 deletions jobs/dpo/dpo_cppo_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ dataset_name='CPPO-RL'

accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \
benchmarks/dpo/dpo_continual.py \
--dataset_name 'CPPO-RL' \
--dataset_name $dataset_name \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \
--learning_rate 5.0e-6 \
--learning_rate 1.0e-6 \
--num_train_epochs 4 \
--per_device_train_batch_size 8 \
--per_device_train_batch_size 16 \
--gradient_checkpointing \
--logging_steps 20 \
--logging_steps 10 \
--eval_strategy steps \
--response_length 256 \
--eval_steps 500 \
--save_steps 500 \
--eval_steps 50000 \
--save_steps 300 \
--bf16 \
--output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-${dataset_name}" \
--no_remove_unused_columns \
--wandb_project $dataset_name \
--wandb_project "$dataset_name-post-May-19" \
--wandb_run_name "Qwen2-0.5B-DPO-${dataset_name}-multi-gpu"
22 changes: 11 additions & 11 deletions jobs/dpo_ewc/dpo_ewc_long_piecewise_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#SBATCH --ntasks-per-node=4 # One task per GPU
#SBATCH --cpus-per-task=6
#SBATCH --mem=64G
#SBATCH --time=24:00:00
#SBATCH --time=1:00:00
#SBATCH --output=out/%x.%j.out # Include job name + job ID
#SBATCH --error=out/%x.%j.err # Include job name + job ID
#SBATCH --mail-type=ALL
Expand All @@ -16,22 +16,22 @@ source .env

dataset_name='aifgen-long-piecewise'

accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \
accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \
benchmarks/dpo_ewc/dpo_EWC_continual.py \
--dataset_name $dataset_name \
--dataset_name benchmarks/continual_data_debug.json \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \
--learning_rate 5.0e-6 \
--reward_model_path LifelongAlignment/Qwen2-0.5B-Instruct_${dataset_name}_REWARD \
--learning_rate 1.0e-6 \
--num_train_epochs 4 \
--per_device_train_batch_size 8 \
--per_device_train_batch_size 16 \
--gradient_checkpointing \
--logging_steps 20 \
--eval_strategy steps \
--response_length 256 \
--eval_steps 500 \
--save_steps 500 \
--eval_steps 50000 \
--save_steps 300 \
--bf16 \
--output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \
--output_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-DPO-EWC-${dataset_name}" \
--no_remove_unused_columns \
--wandb_project $dataset_name \
--wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu"
--wandb_project "$dataset_name-post-May-19" \
--wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu-debug"
88 changes: 88 additions & 0 deletions jobs/parallel_eval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# EVAL
datasets="aifgen-long-piecewise"
dataset_indices="0 1"
checkpoint_indices="300 600"

for dataset_index in $dataset_indices
do
for dataset_name in $datasets
do
for checkpoint in $checkpoint_indices
do
job_name="${dataset_name}-${dataset_index}-${checkpoint}"
mkdir -p out/
run_cmd="jobs/schedule_eval.sh ${dataset_name} ${dataset_index} ${checkpoint}"
sbatch_cmd="sbatch --job-name $job_name ${run_cmd}"
cmd="$sbatch_cmd"
echo -e "${cmd}"
${cmd}
sleep 1
done
done
done

datasets="aifgen-domain-preference-shift"
dataset_indices="0 1 2 3"
checkpoint_indices="300 531"

for dataset_index in $dataset_indices
do
for dataset_name in $datasets
do
for checkpoint in $checkpoint_indices
do
job_name="${dataset_name}-${dataset_index}-${checkpoint}"
mkdir -p out/
run_cmd="jobs/schedule_eval.sh ${dataset_name} ${dataset_index} ${checkpoint}"
sbatch_cmd="sbatch --job-name $job_name ${run_cmd}"
cmd="$sbatch_cmd"
echo -e "${cmd}"
${cmd}
sleep 1
done
done
done

datasets="aifgen-lipschitz"
dataset_indices="0 1 2"
checkpoint_indices="300 900 1063"

for dataset_index in $dataset_indices
do
for dataset_name in $datasets
do
for checkpoint in $checkpoint_indices
do
job_name="${dataset_name}-${dataset_index}-${checkpoint}"
mkdir -p out/
run_cmd="jobs/schedule_eval.sh ${dataset_name} ${dataset_index} ${checkpoint}"
sbatch_cmd="sbatch --job-name $job_name ${run_cmd}"
cmd="$sbatch_cmd"
echo -e "${cmd}"
${cmd}
sleep 1
done
done
done

datasets="aifgen-piecewise-preference-shift"
dataset_indices="0 1 2 3 4 5 6 7"
checkpoint_indices="300 1200 2100"

for dataset_index in $dataset_indices
do
for dataset_name in $datasets
do
for checkpoint in $checkpoint_indices
do
job_name="${dataset_name}-${dataset_index}-${checkpoint}"
mkdir -p out/
run_cmd="jobs/schedule_eval.sh ${dataset_name} ${dataset_index} ${checkpoint}"
sbatch_cmd="sbatch --job-name $job_name ${run_cmd}"
cmd="$sbatch_cmd"
echo -e "${cmd}"
${cmd}
sleep 1
done
done
done
22 changes: 22 additions & 0 deletions jobs/parallel_eval_cppo_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# EVAL
datasets="CPPO-RL"
dataset_indices="0 1"
checkpoint_indices="300 1800 2100"

for dataset_index in $dataset_indices
do
for dataset_name in $datasets
do
for checkpoint in $checkpoint_indices
do
job_name="${dataset_name}-${dataset_index}-${checkpoint}"
mkdir -p out/
run_cmd="jobs/schedule_eval_cppo_dataset.sh ${dataset_name} ${dataset_index} ${checkpoint}"
sbatch_cmd="sbatch --job-name $job_name ${run_cmd}"
cmd="$sbatch_cmd"
echo -e "${cmd}"
${cmd}
sleep 1
done
done
done
2 changes: 1 addition & 1 deletion jobs/ppo/ppo_piecewise_multi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ accelerate launch --config_file benchmarks/cppo/accelerate_configs/deepspeed_zer
--response_length 256 \
--num_train_epochs 4 \
--gradient_checkpointing \
--per_device_train_batch_size 8 \
--per_device_train_batch_size 4 \
--logging_steps 10 \
--eval_strategy steps \
--eval_steps 200 \
Expand Down
77 changes: 77 additions & 0 deletions jobs/schedule_eval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash
#SBATCH --job-name=aif-gen-evaluation
#SBATCH --nodes=1 # Request 2 nodes
#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node
#SBATCH --ntasks-per-node=4 # One task per GPU
#SBATCH --cpus-per-task=6
#SBATCH --mem=64G
#SBATCH --time=2:00:00
#SBATCH --output=out/%x.%j.out # Include job name + job ID
#SBATCH --error=out/%x.%j.err # Include job name + job ID
#SBATCH --mail-type=ALL
#SBATCH --account=aip-rrabba
#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email
source .env

dataset_name=${1:-'aifgen-lipschitz'}
dataset_index=${2:-'0'}
checkpoint=${3:-'300'}

#DPO on CPPO dataset - DIFFERENT FILE
# accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \
# benchmarks/parallel_eval_checkpoints.py \
# --checkpoint_dir "/scratch/s/shahradm/${dataset_name}/Qwen2-0.5B-DPO-/dataset-${dataset_index}/checkpoint-${checkpoint}" \
# --model_name_or_path Qwen/Qwen2-0.5B-Instruct \
# --wandb_run_name "test_eval_Qwen2-0.5B-DPO-rl256-v5-dataset-${dataset_index}-checkpoint-${checkpoint}" \
# --reward_model_path "/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/${dataset_name}/Qwen2-0.5B-Reward-8gpus/Qwen2-0.5B-Instruct_${dataset_name}_REWARD" \
# --wandb_project eval_${dataset_name} \
# --learning_rate 0. \
# --response_length 256 \
# --dataset_name $dataset_name \
# --per_device_eval_batch_size 16 \
# --per_device_train_batch_size 1 \
# --gradient_accumulation_steps 1 \
# --bf16 \
# --output_dir "/lustre/orion/bif151/scratch/ivan.anokhin/AIF-Gen/${dataset_name}/eval_Qwen2-0.5B-DPO-rl256-v5-8gpus-s${dataset_index}" \
# --no_remove_unused_columns


#PPO - not on CPPO dataset
accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \
benchmarks/parallel_eval_checkpoints.py \
--checkpoint_dir "/scratch/s/shahradm/Qwen2-0.5B-PPO-${dataset_name}/Qwen2-0.5B-Instruct_${dataset_name}_PPO_${dataset_index}/checkpoint-${checkpoint}" \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--wandb_run_name "test_eval_Qwen2-0.5B-PPO-rl256-v1-dataset-${dataset_index}-checkpoint-${checkpoint}" \
--reward_model_path "LifelongAlignment/Qwen2-0.5B-Instruct_${dataset_name}_REWARD" \
--wandb_project eval_${dataset_name}_post_may_19 \
--learning_rate 0. \
--response_length 256 \
--dataset_name $dataset_name \
--per_device_eval_batch_size 32 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--bf16 \
--output_dir "/scratch/s/shahradm/${dataset_name}/eval_Qwen2-0.5B-PPO-8gpus-rl256-v1-s${dataset_index}" \
--no_remove_unused_columns

# PPO - on CPPO dataset - DIFFERENT FILE

# CPPO - not on CPPO dataset
accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \
benchmarks/parallel_eval_checkpoints.py \
--checkpoint_dir "/home/s/shahradm/links/projects/aip-rrabba/shared/aifgen_experiments/Qwen2-0.5B-CPPO-${dataset_name}/Qwen2-0.5B-Instruct_${dataset_name}_CPPO_${dataset_index}/checkpoint-${checkpoint}" \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--wandb_run_name "test_eval_Qwen2-0.5B-CPPO-rl256-v1-dataset-${dataset_index}-checkpoint-${checkpoint}" \
--reward_model_path "LifelongAlignment/Qwen2-0.5B-Instruct_${dataset_name}_REWARD" \
--wandb_project eval_${dataset_name}_post_may_19 \
--learning_rate 0. \
--response_length 256 \
--dataset_name $dataset_name \
--per_device_eval_batch_size 32 \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 1 \
--bf16 \
--output_dir "/scratch/s/shahradm/${dataset_name}/eval_Qwen2-0.5B-CPPO-8gpus-rl256-v1-s${dataset_index}" \
--no_remove_unused_columns

# CPPO - on CPPO dataset - DIFFERENT FILE
Loading
Loading