diff --git a/train_grpo.sh b/train_grpo.sh index 119d348e9..38d7a08ed 100644 --- a/train_grpo.sh +++ b/train_grpo.sh @@ -1,6 +1,7 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export DATA_DIR='data/nq_search' - +TRAIN_DATA_DIR=${DATA_DIR} +TEST_DATA_DIR=${DATA_DIR} WAND_PROJECT='Search-R1' # export BASE_MODEL='meta-llama/Llama-3.2-3B' @@ -79,4 +80,4 @@ PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \ max_turns=2 \ retriever.url="http://127.0.0.1:8000/retrieve" \ retriever.topk=3 \ - 2>&1 | tee $EXPERIMENT_NAME.log \ No newline at end of file + 2>&1 | tee $EXPERIMENT_NAME.log