From 1e6553f2260a42889f000ec0b9969c984e1b215f Mon Sep 17 00:00:00 2001
From: qflen <nkimon00@gmail.com>
Date: Wed, 22 Apr 2026 01:04:47 +0200
Subject: [PATCH 1/2] Enable gguflib asserts to reject malformed GGUF tensor
 headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gguflib.c relies on assert() to validate untrusted GGUF input
(e.g. tensor ndim <= GGUF_TENSOR_MAX_DIM). In release builds those
asserts are compiled out by -DNDEBUG, leaving MLX to parse and
dereference the corrupted tensor struct — get_shape() then reads past
the fixed 8-element dim[] array, and downstream code uses clobbered
offset/weights_data fields.

- Force -UNDEBUG on the gguflib target so the asserts stay live in
  every build configuration.
- Add a defensive bounds check in get_shape() so the MLX side also
  refuses oversized ndim if the assert is ever disabled by a packager
  or replaced by a system gguflib.

Closes part of #3358.
---
 mlx/io/CMakeLists.txt |  5 +++++
 mlx/io/gguf.cpp       |  7 +++++++
 tests/CMakeLists.txt  |  6 ++++++
 tests/load_tests.cpp  | 15 +++++++++++++++
 4 files changed, 33 insertions(+)
diff --git a/mlx/io/CMakeLists.txt b/mlx/io/CMakeLists.txt
index eebd7618b3..1fedcd46b4 100644
--- a/mlx/io/CMakeLists.txt
+++ b/mlx/io/CMakeLists.txt
@@ -17,6 +17,11 @@ if(MLX_BUILD_GGUF)
                              PRIVATE $<BUILD_INTERFACE:${gguflib_SOURCE_DIR}>)
   add_library(gguflib STATIC ${gguflib_SOURCE_DIR}/fp16.c
                              ${gguflib_SOURCE_DIR}/gguflib.c)
+  # gguflib uses assert() to reject malformed tensor headers (e.g. ndim > 8).
+  # Those checks are otherwise compiled out by -DNDEBUG in release builds, which
+  # leaves out-of-bounds reads/writes unguarded when loading untrusted GGUF
+  # files. Force NDEBUG off for this target so the asserts stay live.
+  target_compile_options(gguflib PRIVATE -UNDEBUG)
   target_link_libraries(mlx PRIVATE $<BUILD_INTERFACE:gguflib>)
   target_sources(mlx PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/gguf.cpp
                              ${CMAKE_CURRENT_SOURCE_DIR}/gguf_quants.cpp)
diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp
index 206f6fb31f..664de5fe00 100644
--- a/mlx/io/gguf.cpp
+++ b/mlx/io/gguf.cpp
@@ -48,6 +48,13 @@ std::optional<Dtype> gguf_type_to_dtype(const uint32_t& gguf_type) {
 }
 
 Shape get_shape(const gguf_tensor& tensor) {
+  if (tensor.ndim > GGUF_TENSOR_MAX_DIM) {
+    std::ostringstream msg;
+    msg << "[load_gguf] Tensor has " << tensor.ndim
+        << " dimensions, but the maximum supported is " << GGUF_TENSOR_MAX_DIM
+        << ".";
+    throw std::runtime_error(msg.str());
+  }
   Shape shape;
   // The dimension order in GGML is the reverse of the order used in MLX.
   for (int i = tensor.ndim - 1; i >= 0; i--) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 039b03f2f7..1a59d9520d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,6 +40,12 @@ target_link_libraries(tests PRIVATE mlx doctest)
 target_compile_options(tests PRIVATE ${SANITIZER_COMPILE_FLAGS})
 target_link_options(tests PRIVATE ${SANITIZER_LINK_FLAGS})
 
+if(MLX_BUILD_GGUF)
+  FetchContent_GetProperties(gguflib SOURCE_DIR gguflib_SOURCE_DIR)
+  target_include_directories(tests PRIVATE ${gguflib_SOURCE_DIR})
+  target_compile_definitions(tests PRIVATE MLX_BUILD_GGUF)
+endif()
+
 doctest_discover_tests(tests)
 add_test(NAME tests COMMAND tests)
 
diff --git a/tests/load_tests.cpp b/tests/load_tests.cpp
index 491ca158c1..bd88604e32 100644
--- a/tests/load_tests.cpp
+++ b/tests/load_tests.cpp
@@ -9,6 +9,10 @@
 
 #include "mlx/mlx.h"
 
+#ifdef MLX_BUILD_GGUF
+#include "mlx/io/gguf.h"
+#endif
+
 using namespace mlx::core;
 
 std::string get_temp_file(const std::string& name) {
@@ -275,6 +279,17 @@ TEST_CASE("test gguf metadata") {
   }
 }
 
+#ifdef MLX_BUILD_GGUF
+TEST_CASE("test gguf get_shape rejects oversized ndim") {
+  // Malformed GGUF files can declare more dimensions than gguflib's fixed
+  // tensor.dim[] array can hold. get_shape() must reject these rather than
+  // read past the buffer.
+  gguf_tensor tensor{};
+  tensor.ndim = GGUF_TENSOR_MAX_DIM + 1;
+  CHECK_THROWS_AS(get_shape(tensor), std::runtime_error);
+}
+#endif
+
 TEST_CASE("test single array serialization") {
   // Basic test
   {

From 0de7fb250666756c0ed651881ecc7e7aeae8efa0 Mon Sep 17 00:00:00 2001
From: qflen <nkimon00@gmail.com>
Date: Fri, 24 Apr 2026 02:27:16 +0200
Subject: [PATCH 2/2] Drop redundant get_shape() ndim check per review

---
 mlx/io/gguf.cpp      |  7 -------
 tests/CMakeLists.txt |  6 ------
 tests/load_tests.cpp | 15 ---------------
 3 files changed, 28 deletions(-)

diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp
index 664de5fe00..206f6fb31f 100644
--- a/mlx/io/gguf.cpp
+++ b/mlx/io/gguf.cpp
@@ -48,13 +48,6 @@ std::optional<Dtype> gguf_type_to_dtype(const uint32_t& gguf_type) {
 }
 
 Shape get_shape(const gguf_tensor& tensor) {
-  if (tensor.ndim > GGUF_TENSOR_MAX_DIM) {
-    std::ostringstream msg;
-    msg << "[load_gguf] Tensor has " << tensor.ndim
-        << " dimensions, but the maximum supported is " << GGUF_TENSOR_MAX_DIM
-        << ".";
-    throw std::runtime_error(msg.str());
-  }
   Shape shape;
   // The dimension order in GGML is the reverse of the order used in MLX.
   for (int i = tensor.ndim - 1; i >= 0; i--) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1a59d9520d..039b03f2f7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,12 +40,6 @@ target_link_libraries(tests PRIVATE mlx doctest)
 target_compile_options(tests PRIVATE ${SANITIZER_COMPILE_FLAGS})
 target_link_options(tests PRIVATE ${SANITIZER_LINK_FLAGS})
 
-if(MLX_BUILD_GGUF)
-  FetchContent_GetProperties(gguflib SOURCE_DIR gguflib_SOURCE_DIR)
-  target_include_directories(tests PRIVATE ${gguflib_SOURCE_DIR})
-  target_compile_definitions(tests PRIVATE MLX_BUILD_GGUF)
-endif()
-
 doctest_discover_tests(tests)
 add_test(NAME tests COMMAND tests)
 
diff --git a/tests/load_tests.cpp b/tests/load_tests.cpp
index bd88604e32..491ca158c1 100644
--- a/tests/load_tests.cpp
+++ b/tests/load_tests.cpp
@@ -9,10 +9,6 @@
 
 #include "mlx/mlx.h"
 
-#ifdef MLX_BUILD_GGUF
-#include "mlx/io/gguf.h"
-#endif
-
 using namespace mlx::core;
 
 std::string get_temp_file(const std::string& name) {
@@ -279,17 +275,6 @@ TEST_CASE("test gguf metadata") {
   }
 }
 
-#ifdef MLX_BUILD_GGUF
-TEST_CASE("test gguf get_shape rejects oversized ndim") {
-  // Malformed GGUF files can declare more dimensions than gguflib's fixed
-  // tensor.dim[] array can hold. get_shape() must reject these rather than
-  // read past the buffer.
-  gguf_tensor tensor{};
-  tensor.ndim = GGUF_TENSOR_MAX_DIM + 1;
-  CHECK_THROWS_AS(get_shape(tensor), std::runtime_error);
-}
-#endif
-
 TEST_CASE("test single array serialization") {
   // Basic test
   {