EPFLiGHT · Mehdi-Zogh · Nov 13, 2025 · Nov 13, 2025
diff --git a/config/config_endtoend.yaml b/config/config_endtoend.yaml
@@ -0,0 +1,62 @@
+base_llm: meta-llama/Llama-3.1-8B-Instruct
+base_model: /capstor/store/cscs/swissai/a127/homes/$USER/models/alignment/checkpoint-1620 # Update path to your own
+attachment_token: <|reserved_special_token_0|>
+tokenizer_type: llama
+token_size: 4096
+truncation: true # important to avoid OOM Error
+max_seq_length: 4096 # important to avoid OOM Error
+
+loaders:
+  - loader_type: raw-image
+    modality_type: image
+
+modalities:
+  - model_type: meditron_clip
+    clip_name: openai/clip-vit-large-patch14
+    hidden_size: 4096
+
+training_mode: END2END
+
+datasets:
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/BUSI
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/COVID_US
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/ct2
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/DDTI
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/iu_xray
+  - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_instruct
+  #- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/llava_pretrain_cleaned
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_1
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/medtrinity_conversations_2
+  #- packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/pixmo_anything
+  # - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/PMC_VQA
+  - packed_path: /capstor/store/cscs/swissai/a127/meditron/multimediset/arrow/image_mammoth
+
+training_args:
+  output_dir: /capstor/store/cscs/swissai/a127/homes/$USER/models/endtoend-1D
+  dataloader_num_workers: 16  # > 0 not supported for IterableDataset, cf. https://github.com/huggingface/datasets/issues/5984
+  dataloader_prefetch_factor: 4
+  remove_unused_columns: false
+  ddp_find_unused_parameters: false 
+  learning_rate: 1.0e-4
+  bf16: true 
+  per_device_train_batch_size: 1  # note that training_args.n_gpu and training_args.train_batch_size show faulty values
+                                  # with deepspeed -> use deepspeed_plugin instead (besides training_args.distributed_state.num_processes == WORLD_SIZE)
+  gradient_accumulation_steps: 8 
+  num_train_epochs: 10
+  gradient_checkpointing: true
+  gradient_checkpointing_kwargs:
+    use_reentrant: true
+  save_strategy: steps
+  save_steps: 0.25
+  max_grad_norm: 1.0
+  run_name: MultiMeditron-Llama-8B-end2end
+  deepspeed: /users/$USER/MultiMeditron/config/deepspeed.json # Update path to your own
+  accelerator_config:
+    dispatch_batches: false
+  lr_scheduler_type: "cosine_with_min_lr"
+  lr_scheduler_kwargs:
+    min_lr: 3.0e-5
+  report_to: wandb
+  logging_steps: 1
+  weight_decay: 0.01
+