From 97bcbe7177dc255c013fc5d95f21dd909ab6da45 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi Date: Tue, 2 Dec 2025 06:34:17 -0800 Subject: [PATCH] Using buffer for weight tensors for quantized mat mul op. (#15990) Summary: This change affects the performance and memory usage of the quantized matrix multiplication operation in the Executorch Vulkan backend. By using a buffer for weight tensors, the operation may become more efficient and use less memory, especially for large matrices. Reviewed By: yipjustin Differential Revision: D87911255 --- backends/vulkan/runtime/graph/ops/impl/Staging.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 40de9b59e81..db7c5a7e88b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -285,7 +285,7 @@ ValueRef prepack_int4_linear_weight_transposed_interleaved( const int64_t N = qmat2_orig_sizes.at(ndim - 2); const int64_t N_div2 = N / int64_t(2); - utils::StorageType storage_type = utils::kTexture2D; + utils::StorageType storage_type = utils::kBuffer; uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); if (N_div2 > max_extent * 4 || K > max_extent) { storage_type = utils::kBuffer;