From 51c1bd12eac0ed992a66f0bc89350fcab2c33206 Mon Sep 17 00:00:00 2001 From: jongyunhur Date: Tue, 25 Nov 2025 16:20:20 +0900 Subject: [PATCH] Fix: Correct cuBLAS matrix multiplication order for Row-Major inputs - Swapped multiplication order to B * A and dimensions to (n, m). - Added comments explaining Row-Major to Column-Major handling. --- nvml_cublas.cu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nvml_cublas.cu b/nvml_cublas.cu index 0ba9ea5..df02444 100644 --- a/nvml_cublas.cu +++ b/nvml_cublas.cu @@ -104,6 +104,9 @@ void calculate( int const &m, int const &n, int const &k, nvmlClass &nvml ) { /* Fill the matrices with test data */ /* Assume square matrices */ + /* Note: We generate data in Row-Major format, but cuBLAS interprets it as Column-Major. + * This means matrices are effectively transposed when passed to cuBLAS. + * To compensate, we will compute C = B × A instead of C = A × B. */ for ( int i = 0; i < m * m; i++ ) { h_A[i] = std::rand( ) / static_cast( RAND_MAX ); h_B[i] = std::rand( ) / static_cast( RAND_MAX ); @@ -124,7 +127,8 @@ void calculate( int const &m, int const &n, int const &k, nvmlClass &nvml ) { data_type *d_C_ptr = thrust::raw_pointer_cast( &d_C[0] ); /* Performs operation using cublas */ - cublasSgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, d_A_ptr, lda, d_B_ptr, ldb, &beta, d_C_ptr, ldc ); + /* Compute C = B * A to compensate for Row-Major to Column-Major mismatch. */ + cublasSgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, d_B_ptr, ldb, d_A_ptr, lda, &beta, d_C_ptr, ldc ); CUDA_RT_CALL( cudaDeviceSynchronize( ) ); /* Allocate host memory for reading back the result from device memory */