From a8180eed66278ed5efa8f1d3383f677f7feb30cb Mon Sep 17 00:00:00 2001 From: HDCharles Date: Tue, 25 Nov 2025 21:38:45 +0000 Subject: [PATCH 1/5] [test] qwen3 moe w4a16 + skip Summary This test would ordinarily take too long so we only quantize the first 10 layers Signed-off-by: HDCharles --- .../configs/qwen3_w4a16_grouped_quant.yaml | 9 +++++++++ ...ipe_w4a16_group_quant_first_10_layers.yaml | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml create mode 100644 tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml new file mode 100644 index 0000000000..e3c410f3b2 --- /dev/null +++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml @@ -0,0 +1,9 @@ +cadence: "nightly" +test_type: "regression" +model: Qwen/Qwen3-30B-A3B +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft +scheme: W4A16_group +num_calibration_samples: 20 +save_dir: "Qwen3-30B-A3B-W4A16-first-10" +recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml new file mode 100644 index 0000000000..0351195ce4 --- /dev/null +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml @@ -0,0 +1,20 @@ +quant_stage: + quant_modifiers: + GPTQModifier: + ignore: [ + "lm_head", + # Ignore layers (10+) + "re:.*model\\.layers\\.([1-9][0-9])\\..*", + ] + actorder: null + config_groups: + group_0: + weights: + num_bits: 4 + type: "int" + symmetric: True + strategy: "group" + group_size: 128 + input_activations: null + output_activations: null + targets: ["Linear"] From 90c07667f708bcac06fd503e8aec35cf0f758287 Mon Sep 17 00:00:00 2001 From: HDCharles <39544797+HDCharles@users.noreply.github.com> Date: Mon, 1 Dec 2025 13:08:32 -0500 Subject: [PATCH 2/5] Update qwen3_w4a16_grouped_quant.yaml Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com> --- tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml index e3c410f3b2..afaffcb708 100644 --- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml +++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml @@ -5,5 +5,5 @@ dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft scheme: W4A16_group num_calibration_samples: 20 -save_dir: "Qwen3-30B-A3B-W4A16-first-10" +save_dir: "Qwen3-30B-A3B-W4A16-first-10-e2e" recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml From a1739ff7fff06558ac585990b064a898f6795420 Mon Sep 17 00:00:00 2001 From: HDCharles <39544797+HDCharles@users.noreply.github.com> Date: Mon, 1 Dec 2025 15:25:48 -0500 Subject: [PATCH 3/5] update name Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com> --- tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml index afaffcb708..7d0b5836ff 100644 --- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml +++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml @@ -5,5 +5,4 @@ dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft scheme: W4A16_group num_calibration_samples: 20 -save_dir: "Qwen3-30B-A3B-W4A16-first-10-e2e" recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml From ff1180f544c6bca36b31267ef6c043d9422b3a60 Mon Sep 17 00:00:00 2001 From: HDCharles <39544797+HDCharles@users.noreply.github.com> Date: Mon, 1 Dec 2025 15:31:26 -0500 Subject: [PATCH 4/5] ignore gate Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com> --- .../recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml index 0351195ce4..da894d1df5 100644 --- a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml @@ -5,6 +5,7 @@ quant_stage: "lm_head", # Ignore layers (10+) "re:.*model\\.layers\\.([1-9][0-9])\\..*", + "re:.*mlp[.]gate[.].*" ] actorder: null config_groups: From 39468f2ec98e5784a9248d6bb791cbd0d50500a0 Mon Sep 17 00:00:00 2001 From: HDCharles <39544797+HDCharles@users.noreply.github.com> Date: Mon, 1 Dec 2025 15:43:33 -0500 Subject: [PATCH 5/5] update save_dir Signed-off-by: HDCharles <39544797+HDCharles@users.noreply.github.com> --- tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml index 7d0b5836ff..e3c410f3b2 100644 --- a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml +++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml @@ -5,4 +5,5 @@ dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft scheme: W4A16_group num_calibration_samples: 20 +save_dir: "Qwen3-30B-A3B-W4A16-first-10" recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml