From 0e45769e7fac0196fa67087c28a0c86dae000d17 Mon Sep 17 00:00:00 2001 From: Haokun Liu Date: Tue, 23 Jun 2020 11:19:33 -0400 Subject: [PATCH 1/5] add transfer example --- documentation/porting_examples/example5.md | 120 ++++++++++++++++++ .../example5_assets/write_data_configs.py | 103 +++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 documentation/porting_examples/example5.md create mode 100644 documentation/porting_examples/example5_assets/write_data_configs.py diff --git a/documentation/porting_examples/example5.md b/documentation/porting_examples/example5.md new file mode 100644 index 0000000..d422bcb --- /dev/null +++ b/documentation/porting_examples/example5.md @@ -0,0 +1,120 @@ +# Transfer example + +```bash +# Set up these paths according to your env +WORKING_DIR=... # Choose a working dir (better in scratch) +NYU_JIANT_DIR=... # Where you downloaded https://github.com/jiant-dev/nyu-jiant + +MODELS_DIR=${WORKING_DIR}/models +DATA_DIR=${WORKING_DIR}/data +CACHE_DIR=${WORKING_DIR}/cache +RUN_CONFIG_DIR=${WORKING_DIR}/run_config_dir/transfer_example +OUTPUT_DIR=${WORKING_DIR}/output_dir/transfer_example +MODEL_TYPE=roberta-large + +# Download model +python jiant/scripts/preproc/export_model.py \ + --model_type ${MODEL_TYPE} \ + --output_base_path ${MODELS_DIR}/${MODEL_TYPE} + +# Move data into location +# Ping Haokun on slack, if you have access issues +cp -r /scratch/hl3232/shared/transfer_pilot_data ${WORKING_DIR}/data/data + + +# Prepare data configs +python ${NYU_JIANT_DIR}/documentation/porting_examples/example5_assets/write_data_configs.py \ + --output_base_path ${DATA_DIR}/ + + +# Tokenize and cache datasets +for TASK_NAME in mnli ccg squadv1 cosmosqa rte cola boolq wic +do + python jiant/proj/simple/tokenize_and_cache.py \ + --task_config_path ${DATA_DIR}/configs/${TASK_NAME}.json \ + --model_type ${MODEL_TYPE} \ + --model_tokenizer_path ${MODELS_DIR}/${MODEL_TYPE}/tokenizer \ + --phases train,val \ + --max_seq_length 256 \ + --do_iter \ + --smart_truncate \ + --output_dir ${CACHE_DIR}/${MODEL_TYPE}/${TASK_NAME} +done + +# Generate run configs +declare -A TASK_EPOCHS=( + ["mnli"]=3 + ["ccg"]=3 + ["squadv1"]=3 + ["cosmosqa"]=3 + ["rte"]=20 + ["cola"]=20 + ["boolq"]=20 + ["wic"]=20 +) +declare -A GPUS=( + ["mnli"]=1 + ["ccg"]=1 + ["squadv1"]=1 + ["cosmosqa"]=4 + ["rte"]=1 + ["cola"]=1 + ["boolq"]=1 + ["wic"]=1 +) +for TASK_NAME in mnli ccg squadv1 cosmosqa rte cola boolq wic +do + python ${NYU_JIANT_DIR}/documentation/porting_examples/example4_assets/make_config.py \ + --task_config_path ${DATA_DIR}/configs/${TASK_NAME}.json \ + --task_cache_base_path ${CACHE_DIR}/${MODEL_TYPE}/${TASK_NAME} \ + --train_batch_size 16 \ + --epochs ${TASK_EPOCHS[${TASK_NAME}]} \ + --output_path ${RUN_CONFIG_DIR}/${TASK_NAME}.json +done + +# Train single task +for TASK_NAME in mnli ccg squadv1 cosmosqa rte cola boolq wic +do + COMMAND="python \ + jiant/proj/main/runscript.py \ + run \ + --ZZsrc ${MODELS_DIR}/${MODEL_TYPE}/config.json \ + --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${TASK_NAME}.json \ + --model_load_mode from_transformers \ + --learning_rate 1e-5 \ + --force_overwrite \ + --do_train --do_val \ + --do_save \ + --eval_every_steps 2000 \ + --no_improvements_for_n_evals 30 \ + --save_checkpoint_every_steps 10000 \ + --output_dir ${OUTPUT_DIR}/${TASK_NAME}/" sbatch ~/j2_g${GPUS[${TASK_NAME}]}.sbatch +done + +# Train target task from source task +for SOURCE_TASK in mnli ccg squadv1 cosmosqa +do + for TARGET_TASK in rte cola boolq wic + do + COMMAND="python \ + jiant/proj/main/runscript.py \ + run \ + --ZZoverrides model_path \ + --ZZsrc ${MODELS_DIR}/${MODEL_TYPE}/config.json \ + --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${TARGET_TASK}.json \ + --model_load_mode partial \ + --model_path ${OUTPUT_DIR}/${SOURCE_TASK}/best_model.p \ + --learning_rate 1e-5 \ + --force_overwrite \ + --do_train --do_val \ + --do_save \ + --eval_every_steps 5000 \ + --no_improvements_for_n_evals 30 \ + --save_checkpoint_every_steps 10000 \ + --output_dir ${OUTPUT_DIR}/${SOURCE_TASK}__${TARGET_TASK}/" sbatch ~/j2_g${GPUS[${TARGET_TASK}]}.sbatch + done +done + + +grep major ${OUTPUT_DIR}/*/val_metrics.json +``` diff --git a/documentation/porting_examples/example5_assets/write_data_configs.py b/documentation/porting_examples/example5_assets/write_data_configs.py new file mode 100644 index 0000000..304426d --- /dev/null +++ b/documentation/porting_examples/example5_assets/write_data_configs.py @@ -0,0 +1,103 @@ +import os +import jiant.utils.python.io as py_io +import jiant.utils.zconf as zconf + + +def write_data_configs(output_base_path): + all_configs = { + "mnli": { + "task": "mnli", + "paths": { + "train": "mnli/train.jsonl", + "val": "mnli/val.jsonl", + "test": "mnli/test.jsonl", + }, + "name": "mnli", + }, + "ccg": { + "task": "ccg", + "paths": { + "train": "ccg/ccg.train", + "val": "ccg/ccg.dev", + "test": "ccg/ccg.test", + "tags_to_id": "/scratch/hl3236/j2_files/data/data/ccg/tags_to_id.json", + }, + "name": "ccg", + }, + "squadv1": { + "task": "squadv1", + "paths": { + "train": "squadv1/train-v1.1.json", + "val": "squadv1/dev-v1.1.json", + "test": "squadv1/dev-v1.1.json", + }, + "name": "squadv1", + }, + "cosmosqa": { + "task": "cosmosqa", + "paths": { + "train": "cosmosqa/train.csv", + "val": "cosmosqa/valid.csv", + "test": "cosmosqa/test_no_label.csv", + }, + "name": "cosmosqa", + }, + "rte": { + "task": "rte", + "paths": { + "train": "rte/train.jsonl", + "val": "rte/val.jsonl", + "test": "rte/test.jsonl", + }, + "name": "rte", + }, + "cola": { + "task": "cola", + "paths": { + "train": "cola/train.jsonl", + "val": "cola/val.jsonl", + "test": "cola/test.jsonl", + }, + "name": "cola", + }, + "boolq": { + "task": "boolq", + "paths": { + "train": "boolq/train.jsonl", + "val": "boolq/val.jsonl", + "test": "boolq/test.jsonl", + }, + "name": "boolq", + }, + "wic": { + "task": "wic", + "paths": { + "train": "wic/train.jsonl", + "val": "wic/val.jsonl", + "test": "wic/test.jsonl", + }, + "name": "wic", + }, + } + for task_name, config in all_configs.items(): + for split, data_path in config["paths"].items(): + config["paths"][split] = os.path.join(output_base_path, "data", data_path) + + py_io.write_json( + data=config, + path=os.path.join(output_base_path, "configs", f"{task_name}.json"), + ) + + +@zconf.run_config +class RunConfiguration(zconf.RunConfig): + output_base_path = zconf.attr(type=str) + + +def main(): + args = RunConfiguration.default_run_cli() + write_data_configs(output_base_path=args.output_base_path) + + +if __name__ == "__main__": + main() From 4d9a58474be3f13b21260a5a7cad2ae86b296db5 Mon Sep 17 00:00:00 2001 From: Haokun Liu Date: Tue, 23 Jun 2020 11:21:34 -0400 Subject: [PATCH 2/5] minor fix --- .../porting_examples/example5_assets/write_data_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/porting_examples/example5_assets/write_data_configs.py b/documentation/porting_examples/example5_assets/write_data_configs.py index 304426d..866a4fd 100644 --- a/documentation/porting_examples/example5_assets/write_data_configs.py +++ b/documentation/porting_examples/example5_assets/write_data_configs.py @@ -20,7 +20,7 @@ def write_data_configs(output_base_path): "train": "ccg/ccg.train", "val": "ccg/ccg.dev", "test": "ccg/ccg.test", - "tags_to_id": "/scratch/hl3236/j2_files/data/data/ccg/tags_to_id.json", + "tags_to_id": "ccg/tags_to_id.json", }, "name": "ccg", }, From f6558d0916b854d191ed471aa748e15e2b6731c7 Mon Sep 17 00:00:00 2001 From: Haokun Liu Date: Tue, 23 Jun 2020 11:37:05 -0400 Subject: [PATCH 3/5] path changes --- documentation/porting_examples/example5.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/documentation/porting_examples/example5.md b/documentation/porting_examples/example5.md index d422bcb..29e1bd6 100644 --- a/documentation/porting_examples/example5.md +++ b/documentation/porting_examples/example5.md @@ -8,9 +8,10 @@ NYU_JIANT_DIR=... # Where you downloaded https://github.com/jiant-dev/nyu-jiant MODELS_DIR=${WORKING_DIR}/models DATA_DIR=${WORKING_DIR}/data CACHE_DIR=${WORKING_DIR}/cache -RUN_CONFIG_DIR=${WORKING_DIR}/run_config_dir/transfer_example -OUTPUT_DIR=${WORKING_DIR}/output_dir/transfer_example +RUN_CONFIG_DIR=${WORKING_DIR}/run_config_dir +OUTPUT_DIR=${WORKING_DIR}/output_dir MODEL_TYPE=roberta-large +EXP_NAME=transfer_example # Download model python jiant/scripts/preproc/export_model.py \ @@ -69,7 +70,7 @@ do --task_cache_base_path ${CACHE_DIR}/${MODEL_TYPE}/${TASK_NAME} \ --train_batch_size 16 \ --epochs ${TASK_EPOCHS[${TASK_NAME}]} \ - --output_path ${RUN_CONFIG_DIR}/${TASK_NAME}.json + --output_path ${RUN_CONFIG_DIR}/${EXP_NAME}/${TASK_NAME}.json done # Train single task @@ -79,7 +80,7 @@ do jiant/proj/main/runscript.py \ run \ --ZZsrc ${MODELS_DIR}/${MODEL_TYPE}/config.json \ - --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${TASK_NAME}.json \ + --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${EXP_NAME}/${TASK_NAME}.json \ --model_load_mode from_transformers \ --learning_rate 1e-5 \ --force_overwrite \ @@ -88,7 +89,7 @@ do --eval_every_steps 2000 \ --no_improvements_for_n_evals 30 \ --save_checkpoint_every_steps 10000 \ - --output_dir ${OUTPUT_DIR}/${TASK_NAME}/" sbatch ~/j2_g${GPUS[${TASK_NAME}]}.sbatch + --output_dir ${OUTPUT_DIR}/${EXP_NAME}/${TASK_NAME}/" sbatch ~/j2_g${GPUS[${TASK_NAME}]}.sbatch done # Train target task from source task @@ -101,9 +102,9 @@ do run \ --ZZoverrides model_path \ --ZZsrc ${MODELS_DIR}/${MODEL_TYPE}/config.json \ - --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${TARGET_TASK}.json \ + --jiant_task_container_config_path ${RUN_CONFIG_DIR}/${EXP_NAME}/${TARGET_TASK}.json \ --model_load_mode partial \ - --model_path ${OUTPUT_DIR}/${SOURCE_TASK}/best_model.p \ + --model_path ${OUTPUT_DIR}/${EXP_NAME}/${SOURCE_TASK}/best_model.p \ --learning_rate 1e-5 \ --force_overwrite \ --do_train --do_val \ @@ -111,10 +112,10 @@ do --eval_every_steps 5000 \ --no_improvements_for_n_evals 30 \ --save_checkpoint_every_steps 10000 \ - --output_dir ${OUTPUT_DIR}/${SOURCE_TASK}__${TARGET_TASK}/" sbatch ~/j2_g${GPUS[${TARGET_TASK}]}.sbatch + --output_dir ${OUTPUT_DIR}/${EXP_NAME}/${SOURCE_TASK}__${TARGET_TASK}/" sbatch ~/j2_g${GPUS[${TARGET_TASK}]}.sbatch done done -grep major ${OUTPUT_DIR}/*/val_metrics.json +grep major ${OUTPUT_DIR}/${EXP_NAME}/*/val_metrics.json ``` From 19ce14134faecfa793c1c1a6ef027d904df9b22b Mon Sep 17 00:00:00 2001 From: Haokun Liu Date: Tue, 23 Jun 2020 11:37:23 -0400 Subject: [PATCH 4/5] add a sbatch example --- .../porting_examples/example5_assets/j2_g1.md | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 documentation/porting_examples/example5_assets/j2_g1.md diff --git a/documentation/porting_examples/example5_assets/j2_g1.md b/documentation/porting_examples/example5_assets/j2_g1.md new file mode 100644 index 0000000..5a74ffd --- /dev/null +++ b/documentation/porting_examples/example5_assets/j2_g1.md @@ -0,0 +1,34 @@ +# An example of j2_g1.sbatch + +Replace [your net id] with your own id +Create duplications with different gpu numbers gres=gpu:p40:**1** +Move these to ~ +(if you are working from windows) `dos2unix ~/*.sbatch` + +```bash +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=7 +#SBATCH --gres=gpu:p40:1 +#SBATCH --time=168:00:00 +#SBATCH --mem=30000 +#SBATCH --job-name=p40 +#SBATCH --mail-type=END +#SBATCH --mail-user=[your net id]@nyu.edu +#SBATCH --output=/scratch/[your net id]/slurm/%j.out +module purge +module load anaconda3/5.3.1 +source activate jiant2 +export PATH=/home/[your net id]/jiant-dev:$PATH # Where you downloaded https://github.com/jiant-dev/jiant +export PYTHONPATH=/home/[your net id]/jiant-dev:$PYTHONPATH # Where you downloaded https://github.com/jiant-dev/jiant +export WORKING_DIR=/scratch/[your net id]/j2_files # Choose a working dir +export NYU_JIANT_DIR=/home/[your net id]/nyu-jiant # https://github.com/jiant-dev/nyu-jiant +export MODELS_DIR=${WORKING_DIR}/models +export DATA_DIR=${WORKING_DIR}/data +export CACHE_DIR=${WORKING_DIR}/cache +export RUN_CONFIG_DIR=${WORKING_DIR}/run_config_dir/ +export OUTPUT_DIR=${WORKING_DIR}/output_dir/ +echo ${COMMAND} +${COMMAND} +``` From cda36063c4f0762ea8c36f4b4d917b532ec6f803 Mon Sep 17 00:00:00 2001 From: Haokun Liu Date: Tue, 23 Jun 2020 16:10:48 -0400 Subject: [PATCH 5/5] Update example5.md --- documentation/porting_examples/example5.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/porting_examples/example5.md b/documentation/porting_examples/example5.md index 29e1bd6..abc3720 100644 --- a/documentation/porting_examples/example5.md +++ b/documentation/porting_examples/example5.md @@ -109,7 +109,7 @@ do --force_overwrite \ --do_train --do_val \ --do_save \ - --eval_every_steps 5000 \ + --eval_every_steps 2000 \ --no_improvements_for_n_evals 30 \ --save_checkpoint_every_steps 10000 \ --output_dir ${OUTPUT_DIR}/${EXP_NAME}/${SOURCE_TASK}__${TARGET_TASK}/" sbatch ~/j2_g${GPUS[${TARGET_TASK}]}.sbatch