tests/_test_utils/deploy_utils.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -19,6 +19,48 @@
  
    import pytest

    import torch

    # Cache for available backends detection (computed once at import time)

    _AVAILABLE_BACKENDS = None

    def get_available_backends():

        """Detect which backends are available in the current environment.

        Returns:

            set: A set of available backend names ('trtllm', 'vllm', 'sglang')

        """

        global _AVAILABLE_BACKENDS

        if _AVAILABLE_BACKENDS is not None:

            return _AVAILABLE_BACKENDS

        available = set()

        try:

            import tensorrt_llm  # noqa: F401

            available.add("trtllm")

        except ImportError:

            pass

        try:

            import vllm  # noqa: F401

            available.add("vllm")

        except ImportError:

            pass

        try:

            import sglang  # noqa: F401

            available.add("sglang")

        except ImportError:

            pass

        _AVAILABLE_BACKENDS = available

        print(f"[deploy_utils] Detected available backends: {available}")

        return _AVAILABLE_BACKENDS

    # Common test prompts for all backends

    COMMON_PROMPTS = [

        "Hello, my name is",

    @@ -93,15 +135,18 @@ def _deploy_trtllm(self):
  
            try:

                from tensorrt_llm import LLM, SamplingParams

                from tensorrt_llm.llmapi import CudaGraphConfig, EagleDecodingConfig, KvCacheConfig

            except ImportError:

                pytest.skip("tensorrt_llm package not available")

            except ImportError as e:

                raise ImportError("tensorrt_llm package not available. ") from e

            sampling_params = SamplingParams(max_tokens=32)

            spec_config = None

            llm = None

            kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)

            if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":

            if self.model_id in (

                "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",

                "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",

            ):

                llm = LLM(

                    model=self.model_id,

                    tensor_parallel_size=self.tensor_parallel_size,

    @@ -175,8 +220,8 @@ def _deploy_vllm(self):
  
            """Deploy a model using vLLM."""

            try:

                from vllm import LLM, SamplingParams

            except ImportError:

                pytest.skip("vllm package not available")

            except ImportError as e:

                raise ImportError("vllm package not available.") from e

            quantization_method = "modelopt"

            if "fp4" in self.model_id.lower():

    @@ -212,8 +257,8 @@ def _deploy_sglang(self):
  
            """Deploy a model using SGLang."""

            try:

                import sglang as sgl

            except ImportError:

                pytest.skip("sglang package not available")

            except ImportError as e:

                raise ImportError("sglang package not available.") from e

            quantization_method = "modelopt"

            if "fp4" in self.model_id.lower():

                quantization_method = "modelopt_fp4"

    @@ -230,7 +275,10 @@ def _deploy_sglang(self):
  
                    mem_fraction_static=0.7,

                    context_length=1024,

                )

            elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":

            elif self.model_id in (

                "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",

                "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",

            ):

                llm = sgl.Engine(

                    model_path=self.model_id,

                    quantization=quantization_method,

    @@ -259,10 +307,20 @@ def __init__(self, **params):
  
                else:

                    self.params[key] = [value]

            # Filter backends to only include available ones

            if "backend" in self.params:

                available = get_available_backends()

                original_backends = self.params["backend"]

                self.params["backend"] = [b for b in original_backends if b in available]

            # Pre-generate all deployers for pytest compatibility

            self._deployers = list(self._generate_deployers())

        def _generate_deployers(self):

            # If no backends available after filtering, yield nothing

            if "backend" in self.params and not self.params["backend"]:

                return

            for values in itertools.product(*self.params.values()):

                deployer = ModelDeployer(**dict(zip(self.params.keys(), values)))

                # Set test case ID in format "model_id_backend"

tests/examples/gpt_oss/test_gpt_oss_qat.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -294,51 +294,48 @@ def deploy_gpt_oss_trtllm(self, tmp_path, model_path_override=None):
  
    )

    def test_gpt_oss_complete_pipeline(model_path, tmp_path):

        """Test the complete GPT-OSS optimization pipeline by executing all 3 steps in sequence."""

        import pathlib

        # Use current directory instead of tmp_path for checkpoints

        current_dir = pathlib.Path.cwd()

        # Create GPTOSS instance with model path

        gpt_oss = GPTOSS(model_path)

        if model_path == "openai/gpt-oss-20b":

            # Step 1: SFT Training

            sft_checkpoint = gpt_oss.gpt_oss_sft_training(current_dir)

            sft_checkpoint = gpt_oss.gpt_oss_sft_training(tmp_path)

            if not sft_checkpoint or not sft_checkpoint.exists():

                print("Step 1 failed: SFT checkpoint not found, stopping pipeline.")

                return

            print(f"Step 1 completed: SFT checkpoint at {sft_checkpoint}")

            # Step 2: QAT Training (depends on Step 1)

            qat_checkpoint = gpt_oss.gpt_oss_qat_training(current_dir, sft_dir=sft_checkpoint)

            qat_checkpoint = gpt_oss.gpt_oss_qat_training(tmp_path, sft_dir=sft_checkpoint)

            if not qat_checkpoint or not qat_checkpoint.exists():

                print("Step 2 failed: QAT checkpoint not found, stopping pipeline.")

                return

            print(f"Step 2 completed: QAT checkpoint at {qat_checkpoint}")

            # Step 3: MXFP4 Conversion (depends on Step 2)

            mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(current_dir, qat_dir=qat_checkpoint)

            mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(tmp_path, qat_dir=qat_checkpoint)

            if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():

                print("Step 3 failed: MXFP4 checkpoint not found, stopping pipeline.")

                return

            print(f"Step 3 completed: MXFP4 checkpoint at {mxfp4_checkpoint}")

            # Step 4: Deploy with TensorRT-LLM (depends on Step 3)

            print("Step 4: Running deployment with MXFP4 checkpoint...")

            gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)

            gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)

            print("Step 4 completed: Deployment successful")

        elif model_path == "openai/gpt-oss-120b":

            # Step 1: QAT Training with LoRA

            qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(current_dir)

            qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(tmp_path)

            if not qat_lora_checkpoint or not qat_lora_checkpoint.exists():

                print("Step 1 failed: QAT-LoRA checkpoint not found, stopping pipeline.")

                return

            print(f"Step 1 completed: QAT-LoRA checkpoint at {qat_lora_checkpoint}")

            # Step 2: MXFP4 Conversion for LoRA model (depends on Step 1)

            mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion_lora(

                current_dir, qat_lora_dir=qat_lora_checkpoint

                tmp_path, qat_lora_dir=qat_lora_checkpoint

            )

            if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():

                print("Step 2 failed: MXFP4 checkpoint not found, stopping pipeline.")

    @@ -347,5 +344,5 @@ def test_gpt_oss_complete_pipeline(model_path, tmp_path):
  
            # Step 3: Deploy with TensorRT-LLM (depends on Step 2)

            print("Step 3: Running deployment with MXFP4 checkpoint...")

            gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)

            gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)

            print("Step 3 completed: Deployment successful")

Noeyy/add test cases for the newly added checkpoints on HF #827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

noeyy-mino wants to merge 2 commits into NVIDIA:main from noeyy-mino:noeyy/add_cases

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Noeyy/add test cases for the newly added checkpoints on HF #827

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Noeyy/add test cases for the newly added checkpoints on HF #827

Are you sure you want to change the base?

Uh oh!

Noeyy/add test cases for the newly added checkpoints on HF #827

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!