diff --git a/config/config_endtoend.yaml b/config/config_endtoend.yaml new file mode 100644 index 0000000..94f9af2 --- /dev/null +++ b/config/config_endtoend.yaml @@ -0,0 +1,62 @@ +base_llm: meta-llama/Llama-3.1-8B-Instruct +base_model: /capstor/store/cscs/swissai/a127/homes/$USER/models/alignment/checkpoint-1620 # Update path to your own +attachment_token: <|reserved_special_token_0|> +tokenizer_type: llama +token_size: 4096 +truncation: true # important to avoid OOM Error +max_seq_length: 4096 # important to avoid OOM Error + +loaders: + - loader_type: raw-image + modality_type: image + +modalities: + - model_type: meditron_clip + clip_name: openai/clip-vit-large-patch14 + hidden_size: 4096 + +training_mode: END2END + +datasets: + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/BUSI + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/COVID_US + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/ct2 + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/DDTI + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/iu_xray + - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_instruct + #- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_pretrain_cleaned + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_1 + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_2 + #- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/pixmo_anything + # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/PMC_VQA + - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/image_mammoth + +training_args: + output_dir: /capstor/store/cscs/swissai/a127/homes/$USER/models/endtoend-1D + dataloader_num_workers: 16 # > 0 not supported for IterableDataset, cf. https://github.com/huggingface/datasets/issues/5984 + dataloader_prefetch_factor: 4 + remove_unused_columns: false + ddp_find_unused_parameters: false + learning_rate: 1.0e-4 + bf16: true + per_device_train_batch_size: 1 # note that training_args.n_gpu and training_args.train_batch_size show faulty values + # with deepspeed -> use deepspeed_plugin instead (besides training_args.distributed_state.num_processes == WORLD_SIZE) + gradient_accumulation_steps: 8 + num_train_epochs: 10 + gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: true + save_strategy: steps + save_steps: 0.25 + max_grad_norm: 1.0 + run_name: MultiMeditron-Llama-8B-end2end + deepspeed: /users/$USER/MultiMeditron/config/deepspeed.json # Update path to your own + accelerator_config: + dispatch_batches: false + lr_scheduler_type: "cosine_with_min_lr" + lr_scheduler_kwargs: + min_lr: 3.0e-5 + report_to: wandb + logging_steps: 1 + weight_decay: 0.01 +