diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py
index 805624b8f..cfe489d6a 100644
--- a/tests/_test_utils/deploy_utils.py
+++ b/tests/_test_utils/deploy_utils.py
@@ -19,6 +19,48 @@
 import pytest
 import torch
 
+# Cache for available backends detection (computed once at import time)
+_AVAILABLE_BACKENDS = None
+
+
+def get_available_backends():
+    """Detect which backends are available in the current environment.
+
+    Returns:
+        set: A set of available backend names ('trtllm', 'vllm', 'sglang')
+    """
+    global _AVAILABLE_BACKENDS
+    if _AVAILABLE_BACKENDS is not None:
+        return _AVAILABLE_BACKENDS
+
+    available = set()
+
+    try:
+        import tensorrt_llm  # noqa: F401
+
+        available.add("trtllm")
+    except ImportError:
+        pass
+
+    try:
+        import vllm  # noqa: F401
+
+        available.add("vllm")
+    except ImportError:
+        pass
+
+    try:
+        import sglang  # noqa: F401
+
+        available.add("sglang")
+    except ImportError:
+        pass
+
+    _AVAILABLE_BACKENDS = available
+    print(f"[deploy_utils] Detected available backends: {available}")
+    return _AVAILABLE_BACKENDS
+
+
 # Common test prompts for all backends
 COMMON_PROMPTS = [
     "Hello, my name is",
@@ -93,15 +135,18 @@ def _deploy_trtllm(self):
         try:
             from tensorrt_llm import LLM, SamplingParams
             from tensorrt_llm.llmapi import CudaGraphConfig, EagleDecodingConfig, KvCacheConfig
-        except ImportError:
-            pytest.skip("tensorrt_llm package not available")
+        except ImportError as e:
+            raise ImportError("tensorrt_llm package not available. ") from e
 
         sampling_params = SamplingParams(max_tokens=32)
         spec_config = None
         llm = None
         kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8)
 
-        if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+        if self.model_id in (
+            "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
+            "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
+        ):
             llm = LLM(
                 model=self.model_id,
                 tensor_parallel_size=self.tensor_parallel_size,
@@ -175,8 +220,8 @@ def _deploy_vllm(self):
         """Deploy a model using vLLM."""
         try:
             from vllm import LLM, SamplingParams
-        except ImportError:
-            pytest.skip("vllm package not available")
+        except ImportError as e:
+            raise ImportError("vllm package not available.") from e
 
         quantization_method = "modelopt"
         if "fp4" in self.model_id.lower():
@@ -212,8 +257,8 @@ def _deploy_sglang(self):
         """Deploy a model using SGLang."""
         try:
             import sglang as sgl
-        except ImportError:
-            pytest.skip("sglang package not available")
+        except ImportError as e:
+            raise ImportError("sglang package not available.") from e
         quantization_method = "modelopt"
         if "fp4" in self.model_id.lower():
             quantization_method = "modelopt_fp4"
@@ -230,7 +275,10 @@ def _deploy_sglang(self):
                 mem_fraction_static=0.7,
                 context_length=1024,
             )
-        elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8":
+        elif self.model_id in (
+            "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
+            "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
+        ):
             llm = sgl.Engine(
                 model_path=self.model_id,
                 quantization=quantization_method,
@@ -259,10 +307,20 @@ def __init__(self, **params):
             else:
                 self.params[key] = [value]
 
+        # Filter backends to only include available ones
+        if "backend" in self.params:
+            available = get_available_backends()
+            original_backends = self.params["backend"]
+            self.params["backend"] = [b for b in original_backends if b in available]
+
         # Pre-generate all deployers for pytest compatibility
         self._deployers = list(self._generate_deployers())
 
     def _generate_deployers(self):
+        # If no backends available after filtering, yield nothing
+        if "backend" in self.params and not self.params["backend"]:
+            return
+
         for values in itertools.product(*self.params.values()):
             deployer = ModelDeployer(**dict(zip(self.params.keys(), values)))
             # Set test case ID in format "model_id_backend"
diff --git a/tests/examples/gpt_oss/test_gpt_oss_qat.py b/tests/examples/gpt_oss/test_gpt_oss_qat.py
index e5f9b8ab9..43464110b 100644
--- a/tests/examples/gpt_oss/test_gpt_oss_qat.py
+++ b/tests/examples/gpt_oss/test_gpt_oss_qat.py
@@ -294,30 +294,27 @@ def deploy_gpt_oss_trtllm(self, tmp_path, model_path_override=None):
 )
 def test_gpt_oss_complete_pipeline(model_path, tmp_path):
     """Test the complete GPT-OSS optimization pipeline by executing all 3 steps in sequence."""
-    import pathlib
 
-    # Use current directory instead of tmp_path for checkpoints
-    current_dir = pathlib.Path.cwd()
     # Create GPTOSS instance with model path
     gpt_oss = GPTOSS(model_path)
 
     if model_path == "openai/gpt-oss-20b":
         # Step 1: SFT Training
-        sft_checkpoint = gpt_oss.gpt_oss_sft_training(current_dir)
+        sft_checkpoint = gpt_oss.gpt_oss_sft_training(tmp_path)
         if not sft_checkpoint or not sft_checkpoint.exists():
             print("Step 1 failed: SFT checkpoint not found, stopping pipeline.")
             return
         print(f"Step 1 completed: SFT checkpoint at {sft_checkpoint}")
 
         # Step 2: QAT Training (depends on Step 1)
-        qat_checkpoint = gpt_oss.gpt_oss_qat_training(current_dir, sft_dir=sft_checkpoint)
+        qat_checkpoint = gpt_oss.gpt_oss_qat_training(tmp_path, sft_dir=sft_checkpoint)
         if not qat_checkpoint or not qat_checkpoint.exists():
             print("Step 2 failed: QAT checkpoint not found, stopping pipeline.")
             return
         print(f"Step 2 completed: QAT checkpoint at {qat_checkpoint}")
 
         # Step 3: MXFP4 Conversion (depends on Step 2)
-        mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(current_dir, qat_dir=qat_checkpoint)
+        mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion(tmp_path, qat_dir=qat_checkpoint)
         if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():
             print("Step 3 failed: MXFP4 checkpoint not found, stopping pipeline.")
             return
@@ -325,12 +322,12 @@ def test_gpt_oss_complete_pipeline(model_path, tmp_path):
 
         # Step 4: Deploy with TensorRT-LLM (depends on Step 3)
         print("Step 4: Running deployment with MXFP4 checkpoint...")
-        gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)
+        gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)
         print("Step 4 completed: Deployment successful")
 
     elif model_path == "openai/gpt-oss-120b":
         # Step 1: QAT Training with LoRA
-        qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(current_dir)
+        qat_lora_checkpoint = gpt_oss.gpt_oss_qat_training_lora(tmp_path)
         if not qat_lora_checkpoint or not qat_lora_checkpoint.exists():
             print("Step 1 failed: QAT-LoRA checkpoint not found, stopping pipeline.")
             return
@@ -338,7 +335,7 @@ def test_gpt_oss_complete_pipeline(model_path, tmp_path):
 
         # Step 2: MXFP4 Conversion for LoRA model (depends on Step 1)
         mxfp4_checkpoint = gpt_oss.gpt_oss_mxfp4_conversion_lora(
-            current_dir, qat_lora_dir=qat_lora_checkpoint
+            tmp_path, qat_lora_dir=qat_lora_checkpoint
         )
         if not mxfp4_checkpoint or not mxfp4_checkpoint.exists():
             print("Step 2 failed: MXFP4 checkpoint not found, stopping pipeline.")
@@ -347,5 +344,5 @@ def test_gpt_oss_complete_pipeline(model_path, tmp_path):
 
         # Step 3: Deploy with TensorRT-LLM (depends on Step 2)
         print("Step 3: Running deployment with MXFP4 checkpoint...")
-        gpt_oss.deploy_gpt_oss_trtllm(current_dir, model_path_override=mxfp4_checkpoint)
+        gpt_oss.deploy_gpt_oss_trtllm(tmp_path, model_path_override=mxfp4_checkpoint)
         print("Step 3 completed: Deployment successful")
diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py
index 868304f48..4dd98ad9d 100644
--- a/tests/examples/llm_ptq/test_deploy.py
+++ b/tests/examples/llm_ptq/test_deploy.py
@@ -60,31 +60,43 @@ def cleanup_after_test():
     "command",
     [
         *ModelDeployerList(
-            model_id="nvidia/DeepSeek-R1-FP4",
+            model_id="nvidia/DeepSeek-R1-NVFP4",
             backend=("vllm", "trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/DeepSeek-R1-FP4-v2",
+            model_id="nvidia/DeepSeek-R1-NVFP4-v2",
             backend=("vllm", "trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/DeepSeek-R1-0528-FP4",
+            model_id="nvidia/DeepSeek-R1-0528-NVFP4",
             backend=("vllm", "trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/DeepSeek-R1-0528-FP4-v2",
+            model_id="nvidia/DeepSeek-R1-0528-NVFP4-v2",
             backend=("vllm", "trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/DeepSeek-V3-0324-FP4",
+            model_id="nvidia/DeepSeek-V3-0324-NVFP4",
+            backend=("vllm", "trtllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=100,
+        ),
+        *ModelDeployerList(
+            model_id="nvidia/DeepSeek-V3.1-NVFP4",
+            backend=("vllm", "trtllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=100,
+        ),
+        *ModelDeployerList(
+            model_id="nvidia/DeepSeek-V3.2-NVFP4",
             backend=("vllm", "trtllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
@@ -107,7 +119,7 @@ def test_deepseek(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Llama-3.1-8B-Instruct-FP4",
+            model_id="nvidia/Llama-3.1-8B-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -119,7 +131,7 @@ def test_deepseek(command):
             tensor_parallel_size=4,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Llama-3.3-70B-Instruct-FP4",
+            model_id="nvidia/Llama-3.3-70B-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=4,
             mini_sm=100,
@@ -136,7 +148,7 @@ def test_deepseek(command):
             tensor_parallel_size=8,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Llama-3.1-405B-Instruct-FP4",
+            model_id="nvidia/Llama-3.1-405B-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
@@ -148,7 +160,7 @@ def test_deepseek(command):
             tensor_parallel_size=8,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Llama-4-Maverick-17B-128E-Instruct-FP4",
+            model_id="nvidia/Llama-4-Maverick-17B-128E-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
@@ -160,7 +172,7 @@ def test_deepseek(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Llama-4-Scout-17B-16E-Instruct-FP4",
+            model_id="nvidia/Llama-4-Scout-17B-16E-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
@@ -176,7 +188,7 @@ def test_llama(command):
     "command",
     [
         *ModelDeployerList(
-            model_id="nvidia/Qwen3-8B-FP4",
+            model_id="nvidia/Qwen3-8B-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -188,7 +200,7 @@ def test_llama(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Qwen3-14B-FP4",
+            model_id="nvidia/Qwen3-14B-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -200,7 +212,7 @@ def test_llama(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Qwen3-235B-A22B-FP4",
+            model_id="nvidia/Qwen3-235B-A22B-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=2,
             mini_sm=100,
@@ -212,16 +224,16 @@ def test_llama(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/QwQ-32B-FP4", backend=("trtllm", "vllm", "sglang"), mini_sm=100
+            model_id="nvidia/QwQ-32B-NVFP4", backend=("trtllm", "vllm", "sglang"), mini_sm=100
         ),
         *ModelDeployerList(
-            model_id="nvidia/Qwen3-32B-FP4",
+            model_id="nvidia/Qwen3-32B-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=4,
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Qwen2.5-VL-7B-Instruct-FP4",
+            model_id="nvidia/Qwen2.5-VL-7B-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=4,
             mini_sm=100,
@@ -233,11 +245,23 @@ def test_llama(command):
             mini_sm=100,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Qwen3-30B-A3B-FP4",
+            model_id="nvidia/Qwen3-30B-A3B-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=4,
             mini_sm=100,
         ),
+        *ModelDeployerList(
+            model_id="nvidia/Qwen3-Next-80B-A3B-Instruct-NVFP4",
+            backend=("trtllm", "vllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=100,
+        ),
+        *ModelDeployerList(
+            model_id="nvidia/Qwen3-Next-80B-A3B-Thinking-NVFP4",
+            backend=("trtllm", "vllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=100,
+        ),
     ],
     ids=idfn,
 )
@@ -252,11 +276,10 @@ def test_qwen(command):
             model_id="nvidia/Mixtral-8x7B-Instruct-v0.1-FP8", backend=("trtllm", "vllm", "sglang")
         ),
         *ModelDeployerList(
-            model_id="nvidia/Mixtral-8x7B-Instruct-v0.1-FP4",
+            model_id="nvidia/Mixtral-8x7B-Instruct-v0.1-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             mini_sm=100,
         ),
-        #   ModelDeployer(model_id="nvidia/Mixtral-8x7B-Instruct-v0.1-FP8", backend="sglang"), unsupported
     ],
     ids=idfn,
 )
@@ -266,9 +289,9 @@ def test_mixtral(command):
 
 @pytest.mark.parametrize(
     "command",
-    [  # TRTLLM bug: https://nvbugs/5451286
+    [
         *ModelDeployerList(
-            model_id="nvidia/gemma-3-12b-it-FP4",
+            model_id="nvidia/gemma-3-12b-it-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -282,7 +305,7 @@ def test_mixtral(command):
             attn_backend="FLASHINFER",
         ),
         *ModelDeployerList(
-            model_id="nvidia/gemma-3-27b-it-FP4",
+            model_id="nvidia/gemma-3-27b-it-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -307,7 +330,7 @@ def test_gemma(command):
     "command",
     [
         *ModelDeployerList(
-            model_id="nvidia/Phi-4-multimodal-instruct-FP4",
+            model_id="nvidia/Phi-4-multimodal-instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -319,7 +342,7 @@ def test_gemma(command):
             mini_sm=89,
         ),
         *ModelDeployerList(
-            model_id="nvidia/Phi-4-reasoning-plus-FP4",
+            model_id="nvidia/Phi-4-reasoning-plus-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=1,
             mini_sm=100,
@@ -341,7 +364,7 @@ def test_phi(command):
     "command",
     [
         *ModelDeployerList(
-            model_id="nvidia/Kimi-K2-Instruct-FP4",
+            model_id="nvidia/Kimi-K2-Instruct-NVFP4",
             backend=("trtllm", "vllm", "sglang"),
             tensor_parallel_size=8,
             mini_sm=100,
@@ -374,12 +397,6 @@ def test_kimi(command):
             tensor_parallel_size=1,
             mini_sm=89,
         ),
-        *ModelDeployerList(
-            model_id="nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8",
-            backend=("trtllm", "vllm", "sglang"),
-            tensor_parallel_size=4,
-            mini_sm=89,
-        ),
         *ModelDeployerList(
             model_id="nvidia/Llama-3_1-Nemotron-Ultra-253B-v1-FP8",
             backend=("vllm",),
@@ -393,6 +410,13 @@ def test_kimi(command):
             mini_sm=89,
             attn_backend="FLASHINFER",
         ),
+        *ModelDeployerList(
+            model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4",
+            backend=("trtllm", "vllm", "sglang"),
+            tensor_parallel_size=1,
+            mini_sm=89,
+            attn_backend="FLASHINFER",
+        ),
     ],
     ids=idfn,
 )
@@ -454,6 +478,14 @@ def test_medusa(command):
             mini_sm=89,
             eagle3_one_model=False,
         ),
+        *ModelDeployerList(
+            base_model="Qwen/Qwen3-235B-A22B-Thinking-2507",
+            model_id="nvidia/Qwen3-235B-A22B-Thinking-2507-FP4-Eagle3",
+            backend=("trtllm", "sglang"),
+            tensor_parallel_size=8,
+            mini_sm=89,
+            eagle3_one_model=False,
+        ),
         *ModelDeployerList(
             base_model="Qwen/Qwen3-30B-A3B",
             model_id="nvidia/Qwen3-30B-A3B-Eagle3",