Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions config/config_endtoend.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
base_llm: meta-llama/Llama-3.1-8B-Instruct
base_model: /capstor/store/cscs/swissai/a127/homes/$USER/models/alignment/checkpoint-1620 # Update path to your own
attachment_token: <|reserved_special_token_0|>
tokenizer_type: llama
token_size: 4096
truncation: true # important to avoid OOM Error
max_seq_length: 4096 # important to avoid OOM Error

loaders:
- loader_type: raw-image
modality_type: image

modalities:
- model_type: meditron_clip
clip_name: openai/clip-vit-large-patch14
hidden_size: 4096

training_mode: END2END

datasets:
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/BUSI
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/COVID_US
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/ct2
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/DDTI
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/iu_xray
- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_instruct
#- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_pretrain_cleaned
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_1
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_2
#- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/pixmo_anything
# - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/PMC_VQA
- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/image_mammoth

training_args:
output_dir: /capstor/store/cscs/swissai/a127/homes/$USER/models/endtoend-1D
dataloader_num_workers: 16 # > 0 not supported for IterableDataset, cf. https://github.com/huggingface/datasets/issues/5984
dataloader_prefetch_factor: 4
remove_unused_columns: false
ddp_find_unused_parameters: false
learning_rate: 1.0e-4
bf16: true
per_device_train_batch_size: 1 # note that training_args.n_gpu and training_args.train_batch_size show faulty values
# with deepspeed -> use deepspeed_plugin instead (besides training_args.distributed_state.num_processes == WORLD_SIZE)
gradient_accumulation_steps: 8
num_train_epochs: 10
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
save_strategy: steps
save_steps: 0.25
max_grad_norm: 1.0
run_name: MultiMeditron-Llama-8B-end2end
deepspeed: /users/$USER/MultiMeditron/config/deepspeed.json # Update path to your own
accelerator_config:
dispatch_batches: false
lr_scheduler_type: "cosine_with_min_lr"
lr_scheduler_kwargs:
min_lr: 3.0e-5
report_to: wandb
logging_steps: 1
weight_decay: 0.01

Loading