diff --git a/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml new file mode 100644 index 0000000000..e3c410f3b2 --- /dev/null +++ b/tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml @@ -0,0 +1,9 @@ +cadence: "nightly" +test_type: "regression" +model: Qwen/Qwen3-30B-A3B +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft +scheme: W4A16_group +num_calibration_samples: 20 +save_dir: "Qwen3-30B-A3B-W4A16-first-10" +recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml diff --git a/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml new file mode 100644 index 0000000000..da894d1df5 --- /dev/null +++ b/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml @@ -0,0 +1,21 @@ +quant_stage: + quant_modifiers: + GPTQModifier: + ignore: [ + "lm_head", + # Ignore layers (10+) + "re:.*model\\.layers\\.([1-9][0-9])\\..*", + "re:.*mlp[.]gate[.].*" + ] + actorder: null + config_groups: + group_0: + weights: + num_bits: 4 + type: "int" + symmetric: True + strategy: "group" + group_size: 128 + input_activations: null + output_activations: null + targets: ["Linear"]