From a8180eed66278ed5efa8f1d3383f677f7feb30cb Mon Sep 17 00:00:00 2001
From: HDCharles <charlesdavidhernandez@gmail.com>
Date: Tue, 25 Nov 2025 21:38:45 +0000
Subject: [PATCH 1/5] [test] qwen3 moe w4a16 + skip

Summary
This test would ordinarily take too long so we only quantize the first
10 layers

Signed-off-by: HDCharles <charlesdavidhernandez@gmail.com>
---
 .../configs/qwen3_w4a16_grouped_quant.yaml    |  9 +++++++++
 ...ipe_w4a16_group_quant_first_10_layers.yaml | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
 create mode 100644 tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml

diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
new file mode 100644
index 0000000000..e3c410f3b2
--- /dev/null
+++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -0,0 +1,9 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen3-30B-A3B
+dataset_id: HuggingFaceH4/ultrachat_200k
+dataset_split: train_sft
+scheme: W4A16_group
+num_calibration_samples: 20
+save_dir: "Qwen3-30B-A3B-W4A16-first-10"
+recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
new file mode 100644
index 0000000000..0351195ce4
--- /dev/null
+++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
@@ -0,0 +1,20 @@
+quant_stage:
+  quant_modifiers:
+    GPTQModifier:
+      ignore: [
+        "lm_head",
+        # Ignore layers (10+)
+        "re:.*model\\.layers\\.([1-9][0-9])\\..*",
+      ]
+      actorder: null
+      config_groups:
+        group_0:
+          weights:
+            num_bits: 4
+            type: "int"
+            symmetric: True
+            strategy: "group"
+            group_size: 128
+          input_activations: null
+          output_activations: null
+          targets: ["Linear"]

From 90c07667f708bcac06fd503e8aec35cf0f758287 Mon Sep 17 00:00:00 2001
From: HDCharles <39544797+HDCharles@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:08:32 -0500
Subject: [PATCH 2/5] Update qwen3_w4a16_grouped_quant.yaml

Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com>
---
 tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
index e3c410f3b2..afaffcb708 100644
--- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
+++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -5,5 +5,5 @@ dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 scheme: W4A16_group
 num_calibration_samples: 20
-save_dir: "Qwen3-30B-A3B-W4A16-first-10"
+save_dir: "Qwen3-30B-A3B-W4A16-first-10-e2e"
 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml

From a1739ff7fff06558ac585990b064a898f6795420 Mon Sep 17 00:00:00 2001
From: HDCharles <39544797+HDCharles@users.noreply.github.com>
Date: Mon, 1 Dec 2025 15:25:48 -0500
Subject: [PATCH 3/5] update name

Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com>
---
 tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
index afaffcb708..7d0b5836ff 100644
--- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
+++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -5,5 +5,4 @@ dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 scheme: W4A16_group
 num_calibration_samples: 20
-save_dir: "Qwen3-30B-A3B-W4A16-first-10-e2e"
 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml

From ff1180f544c6bca36b31267ef6c043d9422b3a60 Mon Sep 17 00:00:00 2001
From: HDCharles <39544797+HDCharles@users.noreply.github.com>
Date: Mon, 1 Dec 2025 15:31:26 -0500
Subject: [PATCH 4/5] ignore gate

Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com>
---
 .../recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
index 0351195ce4..da894d1df5 100644
--- a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
+++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
@@ -5,6 +5,7 @@ quant_stage:
         "lm_head",
         # Ignore layers (10+)
         "re:.*model\\.layers\\.([1-9][0-9])\\..*",
+        "re:.*mlp[.]gate[.].*"
       ]
       actorder: null
       config_groups:

From 39468f2ec98e5784a9248d6bb791cbd0d50500a0 Mon Sep 17 00:00:00 2001
From: HDCharles <39544797+HDCharles@users.noreply.github.com>
Date: Mon, 1 Dec 2025 15:43:33 -0500
Subject: [PATCH 5/5] update save_dir

Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com>
---
 tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
index 7d0b5836ff..e3c410f3b2 100644
--- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
+++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
@@ -5,4 +5,5 @@ dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 scheme: W4A16_group
 num_calibration_samples: 20
+save_dir: "Qwen3-30B-A3B-W4A16-first-10"
 recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml