diff --git a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseekv3_671b.yaml b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseekv3_671b.yaml index 915d4fb..3862e1d 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseekv3_671b.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/closed_deepseekv3_671b.yaml @@ -69,6 +69,11 @@ REQ: EXACTLY_ONE CHECK: " v['value'] == 1.0 " +- KEY: + NAME: moe_aux_loss_coeff + REQ: EXACTLY_ONE + CHECK: " v['value'] == 0.01 " + - KEY: NAME: gradient_accumulation_steps REQ: EXACTLY_ONE diff --git a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseekv3_671b.yaml b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseekv3_671b.yaml index d6de5fa..b50437b 100644 --- a/mlperf_logging/compliance_checker/training_6.0.0/open_deepseekv3_671b.yaml +++ b/mlperf_logging/compliance_checker/training_6.0.0/open_deepseekv3_671b.yaml @@ -55,6 +55,10 @@ NAME: opt_gradient_clip_norm REQ: EXACTLY_ONE +- KEY: + NAME: moe_aux_loss_coeff + REQ: EXACTLY_ONE + - KEY: NAME: gradient_accumulation_steps REQ: EXACTLY_ONE diff --git a/mlperf_logging/mllog/constants.py b/mlperf_logging/mllog/constants.py index 002a2a1..afcb8f1 100644 --- a/mlperf_logging/mllog/constants.py +++ b/mlperf_logging/mllog/constants.py @@ -177,6 +177,7 @@ START_WARMUP_STEP = "start_warmup_step" INIT_CHECKPOINT_STEP = "init_checkpoint_step" LORA_ALPHA = "lora_alpha" +MOE_AUX_LOSS_COEFF = "moe_aux_loss_coeff" # Log keys - misc. BBOX = "bbox" SEGM = "segm"