diff --git a/examples/llm_qad/qad.sh b/examples/llm_qad/qad.sh index 52ec2bd6a..ac416ad35 100644 --- a/examples/llm_qad/qad.sh +++ b/examples/llm_qad/qad.sh @@ -181,7 +181,7 @@ CHECKPOINT_ARGS=" \ ${LOAD_OPTIM_ARGS} \ --load ${LOAD_CHECKPOINT_DIR} \ --export-kd-teacher-load ${TEACHER_CKPT} \ - --teacher-model-config ${TEACHER_MODEL_CONFIG}" + --export-kd-teacher-model-config ${TEACHER_MODEL_CONFIG}" # KD config (optional) if [[ -n "$KD_CFG_PATH" && -f "$KD_CFG_PATH" ]]; then diff --git a/modelopt/torch/distill/plugins/megatron.py b/modelopt/torch/distill/plugins/megatron.py index b0eeeab74..dbfad6fb6 100644 --- a/modelopt/torch/distill/plugins/megatron.py +++ b/modelopt/torch/distill/plugins/megatron.py @@ -382,7 +382,7 @@ def __init__( model_config: "TransformerConfig", temperature: float = 1.0, reverse: bool = False, - top_k: int = 1000, + top_k: int = 1024, ): """Constructor.