From 15532c3f07a8350923db76ecb6e15a65f179e6df Mon Sep 17 00:00:00 2001 From: Nicolas COMPAIN Date: Thu, 2 Apr 2026 18:54:44 +0200 Subject: [PATCH] fix: pass quantization mode to MLX.quantized in QuantizedLinear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QuantizedLinear.init(weight:...) called MLX.quantized() without forwarding the mode parameter, so weights were always quantized as affine regardless of the specified mode. This produced spurious biases for non-affine modes like mxfp4. Fix: pass mode: mode to MLX.quantized(), matching QuantizedEmbedding which already does this correctly. Note: no updateMissing() override is needed — when biases is nil, Module.build(value:) wraps it as .value(.other(...)), and the (.value(.other(_)), .none) case in update() already breaks silently. Co-Authored-By: Claude Opus 4.6 (1M context) --- Source/MLXNN/Quantized.swift | 2 +- Tests/MLXTests/QuantizationTests.swift | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/MLXNN/Quantized.swift b/Source/MLXNN/Quantized.swift index 5432a8e0..076e91ca 100644 --- a/Source/MLXNN/Quantized.swift +++ b/Source/MLXNN/Quantized.swift @@ -299,7 +299,7 @@ open class QuantizedLinear: Linear, Quantized { self.mode = mode let (quantizedWeight, scales, biases) = MLX.quantized( - weight, groupSize: groupSize, bits: bits) + weight, groupSize: groupSize, bits: bits, mode: mode) self.scales = scales self.biases = biases diff --git a/Tests/MLXTests/QuantizationTests.swift b/Tests/MLXTests/QuantizationTests.swift index 9f1dbf0b..0edbd545 100644 --- a/Tests/MLXTests/QuantizationTests.swift +++ b/Tests/MLXTests/QuantizationTests.swift @@ -34,4 +34,9 @@ class QuantizationTests: XCTestCase { XCTAssertEqual( quantized3.describeExtra(0), "(embeddingCount=512, dimensions=1024)") } + + func testQuantizedLinearMxfp4DoesNotCreateAffineBiases() { + let quantized = QuantizedLinear(64, 64, groupSize: 32, bits: 4, mode: .mxfp4) + XCTAssertNil(quantized.biases) + } }