Fix mcore nvfp4 export for vllm #816

cjluo-nv · 2026-01-26T19:27:44Z

how about we always save group_size if group_size is valid?

-Original file line number
+Diff line change
@@ Expand Up / @@ -331,6 +331,9 @@ def save_pretrained( @@
                         "exclude_modules": ["lm_head"],
                     },
                 }
+                if "NVFP4" in quantization:
+                    # for vllm, the group size is required
+                    hf_quant_config["quantization"]["group_size"] = 16
                 with open(save_directory + "/hf_quant_config.json", "w") as f:
                     json.dump(hf_quant_config, f, indent=4)
@@ Expand Down @@

Provide feedback