From 01c6cba2b4764a3ff8d9c3c54980178914b1a6dd Mon Sep 17 00:00:00 2001
From: Patel230 <Lakshmanp230@gmail.com>
Date: Thu, 11 Dec 2025 14:20:57 +0530
Subject: [PATCH] Add tp-size and pp-size variations and update TensorRT-LLM
 version for GPT-J

Fixes #671

This PR fixes both issues reported in #671:

1. Missing tp-size/pp-size variations
2. nvidia-ammo installation failure in Docker

## Changes

### Fix 1: Add tp-size and pp-size variations
- Added tp-size.# and pp-size.# variation definitions
- Set default tp-size.1 and pp-size.1 for pytorch,nvidia variation
- Added MLC_NVIDIA_TP_SIZE and MLC_NVIDIA_PP_SIZE to new_env_keys

This resolves the error: "no scripts were found with tags:
get,ml-model,gptj,_nvidia,_fp8,_tp-size.2"

### Fix 2: Update TensorRT-LLM to v5.0
- Updated TensorRT-LLM SHA from 0ab9d17 (Feb 2024) to 2ea17cd (v5.0)
- Added required submodules list to match llama2 implementation
- Removed _lfs tag as it's not needed with newer version

This resolves the nvidia-ammo "RuntimeError: Bad params" installation
failure that occurred with the older TensorRT-LLM version.

## Testing
- Validated YAML syntax
- Verified changes match llama2 script patterns
- Confirmed TensorRT-LLM version is same as llama2 v5.0
---
 script/get-ml-model-gptj/meta.yaml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/script/get-ml-model-gptj/meta.yaml b/script/get-ml-model-gptj/meta.yaml
index f6db9adac..cbe7e6e28 100644
--- a/script/get-ml-model-gptj/meta.yaml
+++ b/script/get-ml-model-gptj/meta.yaml
@@ -16,6 +16,8 @@ input_mapping:
 new_env_keys:
 - MLC_ML_MODEL_*
 - GPTJ_CHECKPOINT_PATH
+- MLC_NVIDIA_TP_SIZE
+- MLC_NVIDIA_PP_SIZE
 prehook_deps:
 - enable_if_env:
     MLC_TMP_REQUIRE_DOWNLOAD:
@@ -152,11 +154,13 @@ variations:
   pytorch,nvidia:
     default_variations:
       precision: fp8
+      tp-size: tp-size.1
+      pp-size: pp-size.1
     deps:
     - env:
         MLC_GIT_CHECKOUT_PATH_ENV_NAME: MLC_TENSORRT_LLM_CHECKOUT_PATH
       extra_cache_tags: tensorrt-llm
-      tags: get,git,repo,_lfs,_repo.https://github.com/NVIDIA/TensorRT-LLM.git,_sha.0ab9d17a59c284d2de36889832fe9fc7c8697604
+      tags: get,git,repo,_repo.https://github.com/NVIDIA/TensorRT-LLM.git,_sha.2ea17cdad28bed0f30e80eea5b1380726a7c6493,_submodules.3rdparty/NVTX;3rdparty/cutlass;3rdparty/cxxopts;3rdparty/json;3rdparty/pybind11;3rdparty/ucxx;3rdparty/xgrammar
     - names:
       - cuda
       tags: get,cuda
@@ -253,6 +257,14 @@ variations:
       MLC_ML_MODEL_PRECISION: uint8
       MLC_ML_MODEL_WEIGHT_DATA_TYPES: uint8
     group: precision
+  tp-size.#:
+    env:
+      MLC_NVIDIA_TP_SIZE: '#'
+    group: tp-size
+  pp-size.#:
+    env:
+      MLC_NVIDIA_PP_SIZE: '#'
+    group: pp-size
   wget:
     add_deps_recursive:
       dae: