Add tp-size and pp-size variations to GPT-J model script

Patel230 · Patel230 · commit ae7fc2e7fc33 · 2025-12-11T14:20:57.000+05:30
Fixes #671 Added missing tensor parallelism (tp-size) and pipeline parallelism (pp-size) variations to the get-ml-model-gptj script to match the implementation in get-ml-model-llama2. Changes: - Added tp-size.# and pp-size.# variation definitions - Set default tp-size.1 and pp-size.1 for pytorch,nvidia variation - Added MLC_NVIDIA_TP_SIZE and MLC_NVIDIA_PP_SIZE to new_env_keys This resolves the error "no scripts were found with tags: get,ml-model,gptj,_nvidia,_fp8,_tp-size.2" when users try to set MLC_NVIDIA_TP_SIZE environment variable.
diff --git a/script/get-ml-model-gptj/meta.yaml b/script/get-ml-model-gptj/meta.yaml
@@ -16,6 +16,8 @@ input_mapping:
 new_env_keys:
 - MLC_ML_MODEL_*
 - GPTJ_CHECKPOINT_PATH
+- MLC_NVIDIA_TP_SIZE
+- MLC_NVIDIA_PP_SIZE
 prehook_deps:
 - enable_if_env:
     MLC_TMP_REQUIRE_DOWNLOAD:
@@ -152,6 +154,8 @@ variations:
   pytorch,nvidia:
     default_variations:
       precision: fp8
+      tp-size: tp-size.1
+      pp-size: pp-size.1
     deps:
     - env:
         MLC_GIT_CHECKOUT_PATH_ENV_NAME: MLC_TENSORRT_LLM_CHECKOUT_PATH
@@ -253,6 +257,14 @@ variations:
       MLC_ML_MODEL_PRECISION: uint8
       MLC_ML_MODEL_WEIGHT_DATA_TYPES: uint8
     group: precision
+  tp-size.#:
+    env:
+      MLC_NVIDIA_TP_SIZE: '#'
+    group: tp-size
+  pp-size.#:
+    env:
+      MLC_NVIDIA_PP_SIZE: '#'
+    group: pp-size
   wget:
     add_deps_recursive:
       dae: