diff --git a/tests/e2e/vLLM/configs/fp8_block.yaml b/tests/e2e/vLLM/configs/fp8_block.yaml new file mode 100644 index 000000000..a468977e4 --- /dev/null +++ b/tests/e2e/vLLM/configs/fp8_block.yaml @@ -0,0 +1,5 @@ +cadence: "nightly" +test_type: "regression" +model: Qwen/Qwen2.5-0.5B +scheme: FP8_BLOCK +recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml diff --git a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml new file mode 100644 index 000000000..ff0ac634c --- /dev/null +++ b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml @@ -0,0 +1,6 @@ +quant_stage: + quant_modifiers: + QuantizationModifier: + targets: "Linear" + scheme: "FP8_BLOCK" + ignore: ["lm_head", "re:.*mlp.gate$"]