From b99d9dc88faa8b031b74f1db4e3d675de8696f18 Mon Sep 17 00:00:00 2001 From: Kai Germaschewski Date: Tue, 27 Sep 2022 00:29:20 -0400 Subject: [PATCH 1/3] test_lapack: directly make test matrices / pivot vectors --- tests/test_lapack.cxx | 199 +++++++++++------------------------------- 1 file changed, 51 insertions(+), 148 deletions(-) diff --git a/tests/test_lapack.cxx b/tests/test_lapack.cxx index 67fa4609..0f68023b 100644 --- a/tests/test_lapack.cxx +++ b/tests/test_lapack.cxx @@ -6,144 +6,44 @@ #include "test_helpers.h" -template -void set_A0(T&& h_A) -{ - // matlab/octave: - // A = [1 2 2; 4 4 2; 4 6 4]; - // L,U,p = lu(A) - // first column - h_A(0, 0) = 1; - h_A(1, 0) = 4; - h_A(2, 0) = 4; - // second column - h_A(0, 1) = 2; - h_A(1, 1) = 4; - h_A(2, 1) = 6; - // third column - h_A(0, 2) = 2; - h_A(1, 2) = 2; - h_A(2, 2) = 4; -} +using namespace std::complex_literals; -template -void set_A0_nopiv(T&& h_A) -{ - // matlab/octave: - // does not require pivoting even if pivoting is possible - // A = [1 0 0; 1 2 0; 0 2 3]; - // L,U,p = lu(A) - // first column - h_A(0, 0) = 1.; - h_A(1, 0) = 0.; - h_A(2, 0) = 0.; - // second column - h_A(0, 1) = 1.; - h_A(1, 1) = 2.; - h_A(2, 1) = 0.; - // third column - h_A(0, 2) = 0.; - h_A(1, 2) = 2.; - h_A(2, 2) = 3.; -} +// matlab/octave: +// L,U,p = lu(A) -template -void set_A0_LU(T&& h_A) -{ - // first column factored - h_A(0, 0) = 4.0; - h_A(1, 0) = 1.0; - h_A(2, 0) = 0.25; - // second column - h_A(0, 1) = 4.0; - h_A(1, 1) = 2.0; - h_A(2, 1) = 0.5; - // thrid column - h_A(0, 2) = 2.0; - h_A(1, 2) = 2.0; - h_A(2, 2) = 0.5; -} +// clang-format off +// A0 = [1 2 2; 4 4 2; 4 6 4]; +gt::gtensor A0{{1, 4, 4}, + {2, 4, 6}, + {2, 2, 4}}; -template -void set_A0_piv(T&& h_p) -{ - h_p(0) = 2; - h_p(1) = 3; - h_p(2) = 3; -} +gt::gtensor A0_LU{{4.0, 1.0, 0.25}, + {4.0, 2.0, 0.5 }, + {2.0, 2.0, 0.5 }}; -template -void set_A1_complex(C&& h_A1) -{ - using T = typename C::value_type; - - // second matrix, complex - // matlab/octave: - // B = [1+i 2-i 2; 4i 4 2; 4 6i 4]; - // L,U,p = lu(A2); - // first column - h_A1(0, 0) = T(1, 1); - h_A1(1, 0) = T(0, 4); - h_A1(2, 0) = T(4, 0); - // second column - h_A1(0, 1) = T(2, -1); - h_A1(1, 1) = T(4, 0); - h_A1(2, 1) = T(0, 6); - // third column - h_A1(0, 2) = T(2, 0); - h_A1(1, 2) = T(2, 0); - h_A1(2, 2) = T(4, 0); -} +gt::gtensor A0_piv{2, 3, 3}; -template -void set_A1_complex_nopiv(C&& h_A1) -{ - using T = typename C::value_type; - - // second matrix, complex - // matlab/octave: - // B = [1+i 2-i 2; 4i 4 2; 4 6i 4]; - // L,U,p = lu(A2); - // first column - h_A1(0, 0) = T(1, 0); - h_A1(1, 0) = T(0, 0); - h_A1(2, 0) = T(0, 0); - // second column - h_A1(0, 1) = T(1, 0); - h_A1(1, 1) = T(2, 0); - h_A1(2, 1) = T(0, 0); - // third column - h_A1(0, 2) = T(0, 0); - h_A1(1, 2) = T(2, 0); - h_A1(2, 2) = T(3, 0); -} +// A0_nopiv = [1 0 0; 1 2 0; 0 2 3]; +gt::gtensor A0_nopiv{{1, 0, 0}, + {1, 2, 0}, + {0, 2, 3}}; -template -void set_A1_LU_complex(C&& h_A1) -{ - using T = typename C::value_type; +// A1 = [1+i 2-i 2; 4i 4 2; 4 6i 4]; +gt::gtensor, 2> A1{{1. + 1.i, 4.i, 4. }, + {2. - 1.i, 4., 6.i}, + {2. , 2., 4. }}; +// A1_LU +gt::gtensor, 2> A1_LU{{4.i, - 1.i, 0.25 - 0.25i}, + {4. , 10.i, - 0.10i}, + {2. , 4. + 2.i, 1.30 + 0.90i}}; - // first column - h_A1(0, 0) = T(0, 4); - h_A1(1, 0) = T(0, -1); - h_A1(2, 0) = T(0.25, -0.25); - // second column factored - h_A1(0, 1) = T(4, 0); - h_A1(1, 1) = T(0, 10); - h_A1(2, 1) = T(0, -0.1); - // third column factored - h_A1(0, 2) = T(2, 0); - h_A1(1, 2) = T(4, 2); - h_A1(2, 2) = T(1.3, 0.9); -} +gt::gtensor A1_piv{2, 3, 3}; -template -void set_A1_piv(T&& h_p) -{ - h_p(0) = 2; - h_p(1) = 3; - h_p(2) = 3; -} +// A1_nopiv +gt::gtensor, 2> A1_nopiv{{1., 0., 0.}, + {1., 2., 0.}, + {0., 2., 3.}}; +// clang-format on template void test_getrf_batch_real() @@ -162,7 +62,7 @@ void test_getrf_batch_real() auto h_A0 = h_A.view(gt::all, gt::all, 0); - set_A0(h_A0); + h_A0 = A0; h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); gt::copy(h_A, d_A); @@ -235,9 +135,8 @@ void test_getrs_batch_real() gt::gtensor_device d_p(gt::shape(N, batch_size)); // set up first (and only) batch - set_A0_LU(h_A.view(gt::all, gt::all, 0)); - h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); - set_A0_piv(h_p.view(gt::all, 0)); + h_A.view(gt::all, gt::all, 0) = A0_LU; + h_p.view(gt::all, 0) = A0_piv; // set up two col vectors to solve in first (and only) batch // first RHS, col vector [11; 18; 28] @@ -248,6 +147,8 @@ void test_getrs_batch_real() h_B(0, 1, 0) = 73; h_B(1, 1, 0) = 78; h_B(2, 1, 0) = 154; + + h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Bptr(0) = gt::raw_pointer_cast(d_B.data()); gt::copy(h_Aptr, d_Aptr); @@ -301,13 +202,13 @@ void test_getrf_batch_complex() gt::gtensor_device d_info(batch_size); // setup first batch matrix input - set_A0(h_A.view(gt::all, gt::all, 0)); - h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); + h_A.view(gt::all, gt::all, 0) = A0; // setup second batch matrix input - set_A1_complex(h_A.view(gt::all, gt::all, 1)); + h_A.view(gt::all, gt::all, 1) = A1; // TODO: better notation for this, i.e. the ability to get a pointer from a // view if it is wrapping a gcontainer or gtensor_span? + h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Aptr(1) = h_Aptr(0) + N * N; gt::copy(h_A, d_A); @@ -396,7 +297,7 @@ void test_getrf_npvt_batch_complex() gt::gtensor_device d_info(batch_size); // setup first batch matrix input - set_A0_nopiv(h_A.view(gt::all, gt::all, 0)); + h_A.view(gt::all, gt::all, 0) = A0_nopiv; h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); // setup second batch matrix input @@ -502,14 +403,15 @@ void test_getrs_batch_complex() test::gtensor2 d_p(gt::shape(N, batch_size)); // setup input for first batch - set_A0_LU(h_A.view(gt::all, gt::all, 0)); - h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); - set_A0_piv(h_p.view(gt::all, 0)); + h_A.view(gt::all, gt::all, 0) = A0_LU; + h_p.view(gt::all, 0) = A0_piv; // setup input for second batch - set_A1_LU_complex(h_A.view(gt::all, gt::all, 1)); + h_A.view(gt::all, gt::all, 1) = A1_LU; + h_p.view(gt::all, 1) = A1_piv; + + h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Aptr[1] = h_Aptr(0) + N * N; - set_A1_piv(h_p.view(gt::all, 1)); // first batch, first rhs col vector (11; 18; 28) h_B(0, 0, 0) = 11; @@ -608,14 +510,15 @@ void test_getri_batch_complex() gt::gtensor_device d_info(batch_size); // setup input for first batch - set_A0_LU(h_A.view(gt::all, gt::all, 0)); - h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); - set_A0_piv(h_p.view(gt::all, 0)); + h_A.view(gt::all, gt::all, 0) = A0_LU; + h_p.view(gt::all, 0) = A0_piv; // setup input for second batch - set_A1_LU_complex(h_A.view(gt::all, gt::all, 1)); + h_A.view(gt::all, gt::all, 1) = A1_LU; + h_p.view(gt::all, 1) = A1_piv; + + h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Aptr[1] = h_Aptr(0) + N * N; - set_A1_piv(h_p.view(gt::all, 1)); h_Cptr(0) = gt::raw_pointer_cast(d_C.data()); h_Cptr(1) = h_Cptr(0) + N * N; From 0f4e906e9b18554fc86035c631f0d65d51f21a14 Mon Sep 17 00:00:00 2001 From: Kai Germaschewski Date: Tue, 27 Sep 2022 13:32:24 -0400 Subject: [PATCH 2/3] tests/lapack: make tests more concise They are still kinda lengthy compared to what they do -- we should probably have a better way to deal with the host/device mirroring. And generally, there's still more room for improvement, I don't think I made all the best choices here... --- tests/test_lapack.cxx | 175 ++++++++++-------------------------------- 1 file changed, 42 insertions(+), 133 deletions(-) diff --git a/tests/test_lapack.cxx b/tests/test_lapack.cxx index 0f68023b..1c56ef2d 100644 --- a/tests/test_lapack.cxx +++ b/tests/test_lapack.cxx @@ -21,7 +21,10 @@ gt::gtensor A0_LU{{4.0, 1.0, 0.25}, {4.0, 2.0, 0.5 }, {2.0, 2.0, 0.5 }}; -gt::gtensor A0_piv{2, 3, 3}; +// Math notation is 2, 3, 1, but BLAS uses procedural notation, i.e. +// on the thirst step no swap is done so one-based index of third row +// is still 3 (no swapping). +gt::gtensor A0_piv{2, 3, 3}; // A0_nopiv = [1 0 0; 1 2 0; 0 2 3]; gt::gtensor A0_nopiv{{1, 0, 0}, @@ -37,7 +40,7 @@ gt::gtensor, 2> A1_LU{{4.i, - 1.i, 0.25 - 0.25i}, {4. , 10.i, - 0.10i}, {2. , 4. + 2.i, 1.30 + 0.90i}}; -gt::gtensor A1_piv{2, 3, 3}; +gt::gtensor A1_piv{2, 3, 3}; // A1_nopiv gt::gtensor, 2> A1_nopiv{{1., 0., 0.}, @@ -60,9 +63,7 @@ void test_getrf_batch_real() gt::gtensor h_info(batch_size); gt::gtensor_device d_info(batch_size); - auto h_A0 = h_A.view(gt::all, gt::all, 0); - - h_A0 = A0; + h_A.view(gt::all, gt::all, 0) = A0; h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); gt::copy(h_A, d_A); @@ -78,30 +79,11 @@ void test_getrf_batch_real() gt::copy(d_p, h_p); gt::copy(d_info, h_info); - // first column factored - EXPECT_EQ(h_A0(0, 0), 4.0); - EXPECT_EQ(h_A0(1, 0), 1.0); - EXPECT_EQ(h_A0(2, 0), 0.25); - // second column factored - EXPECT_EQ(h_A0(0, 1), 4.0); - EXPECT_EQ(h_A0(1, 1), 2.0); - EXPECT_EQ(h_A0(2, 1), 0.5); - // third column factored - EXPECT_EQ(h_A0(0, 2), 2.0); - EXPECT_EQ(h_A0(1, 2), 2.0); - EXPECT_EQ(h_A0(2, 2), 0.5); - - // Math notation is 2, 3, 1, but BLAS uses procedural notation, i.e. - // on the thirst step no swap is done so one-based index of third row - // is still 3 (no swapping). - EXPECT_EQ(h_p(0, 0), 2); - EXPECT_EQ(h_p(1, 0), 3); - EXPECT_EQ(h_p(2, 0), 3); + auto h_A0 = h_A.view(gt::all, gt::all, 0); + EXPECT_NEAR(gt::sum_squares(h_A0 - A0_LU), 0., 1e-14); + EXPECT_EQ(h_p.view(gt::all, 0), A0_piv); - for (int b = 0; b < batch_size; b++) { - // A_i factored successfully - EXPECT_EQ(h_info(b), 0); - } + EXPECT_EQ(h_info, gt::zeros({batch_size})); } TEST(lapack, sgetrf_batch) @@ -140,13 +122,9 @@ void test_getrs_batch_real() // set up two col vectors to solve in first (and only) batch // first RHS, col vector [11; 18; 28] - h_B(0, 0, 0) = 11; - h_B(1, 0, 0) = 18; - h_B(2, 0, 0) = 28; + h_B.view(gt::all, 0, 0) = gt::gtensor{11., 18., 28.}; // second RHS, col vector [73; 78; 154] - h_B(0, 1, 0) = 73; - h_B(1, 1, 0) = 78; - h_B(2, 1, 0) = 154; + h_B.view(gt::all, 1, 0) = gt::gtensor{73., 78., 154.}; h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Bptr(0) = gt::raw_pointer_cast(d_B.data()); @@ -166,13 +144,9 @@ void test_getrs_batch_real() gt::copy(d_B, h_B); // solution vector [1; 2; 3] - EXPECT_EQ(h_B(0, 0, 0), 1.0); - EXPECT_EQ(h_B(1, 0, 0), 2.0); - EXPECT_EQ(h_B(2, 0, 0), 3.0); + EXPECT_EQ(h_B.view(gt::all, 0, 0), (gt::gtensor{1., 2., 3.})); // solution vector [-3; 7; 31] - EXPECT_EQ(h_B(0, 1, 0), -3.0); - EXPECT_EQ(h_B(1, 1, 0), 7.0); - EXPECT_EQ(h_B(2, 1, 0), 31.0); + EXPECT_EQ(h_B.view(gt::all, 1, 0), (gt::gtensor{-3., 7., 31.})); } TEST(lapack, sgetrs_batch) @@ -225,51 +199,16 @@ void test_getrf_batch_complex() gt::copy(d_info, h_info); // first batch matrix result - // first column factored - expect_complex_near(h_A(0, 0, 0), 4.0); - expect_complex_near(h_A(1, 0, 0), 1.0); - expect_complex_near(h_A(2, 0, 0), 0.25); - // second column factored - expect_complex_near(h_A(0, 1, 0), 4.0); - expect_complex_near(h_A(1, 1, 0), 2.0); - expect_complex_near(h_A(2, 1, 0), 0.5); - // third column factored - expect_complex_near(h_A(0, 2, 0), 2.0); - expect_complex_near(h_A(1, 2, 0), 2.0); - expect_complex_near(h_A(2, 2, 0), 0.5); - - // Math notation is 2, 3, 1, but BLAS uses procedural notation, i.e. - // on the thirst step no swap is done so one-based index of third row - // is still 3 (no swapping). - EXPECT_EQ(h_p(0, 0), 2); - EXPECT_EQ(h_p(1, 0), 3); - EXPECT_EQ(h_p(2, 0), 3); + gt::gtensor h_A0 = h_A.view(gt::all, gt::all, 0); + EXPECT_NEAR(gt::sum_squares(h_A0 - gt::gtensor(A0_LU)), 0., 1e-14); + EXPECT_EQ(h_p.view(gt::all, 0), A0_piv); // second batch matrix result - // first column factored - expect_complex_near(h_A(0, 0, 1), T(0, 4)); - expect_complex_near(h_A(1, 0, 1), T(0, -1)); - expect_complex_near(h_A(2, 0, 1), T(0.25, -0.25)); - // second column factored - expect_complex_near(h_A(0, 1, 1), T(4, 0)); - expect_complex_near(h_A(1, 1, 1), T(0, 10)); - expect_complex_near(h_A(2, 1, 1), T(0, -0.1)); - // third column factored - expect_complex_near(h_A(0, 2, 1), T(2, 0)); - expect_complex_near(h_A(1, 2, 1), T(4, 2)); - expect_complex_near(h_A(2, 2, 1), T(1.3, 0.9)); + gt::gtensor h_A1 = h_A.view(gt::all, gt::all, 1); + EXPECT_NEAR(gt::sum_squares(h_A1 - gt::gtensor(A1_LU)), 0., 1e-14); + EXPECT_EQ(h_p.view(gt::all, 1), A1_piv); - // Math notation is 2, 3, 1, but BLAS uses procedural notation, i.e. - // on the thirst step no swap is done so one-based index of third row - // is still 3 (no swapping). - EXPECT_EQ(h_p(0, 1), 2); - EXPECT_EQ(h_p(1, 1), 3); - EXPECT_EQ(h_p(2, 1), 3); - - for (int b = 0; b < batch_size; b++) { - // A_i factored successfully - EXPECT_EQ(h_info(b), 0); - } + EXPECT_EQ(h_info, gt::zeros({batch_size})); } TEST(lapack, cgetrf_batch) @@ -298,12 +237,12 @@ void test_getrf_npvt_batch_complex() // setup first batch matrix input h_A.view(gt::all, gt::all, 0) = A0_nopiv; - h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); - // setup second batch matrix input - set_A1_complex_nopiv(h_A.view(gt::all, gt::all, 1)); + h_A.view(gt::all, gt::all, 1) = A1_nopiv; + // TODO: better notation for this, i.e. the ability to get a pointer from a // view if it is wrapping a gcontainer or gtensor_span? + h_Aptr(0) = gt::raw_pointer_cast(d_A.data()); h_Aptr(1) = h_Aptr(0) + N * N; gt::copy(h_A, d_A); @@ -414,21 +353,15 @@ void test_getrs_batch_complex() h_Aptr[1] = h_Aptr(0) + N * N; // first batch, first rhs col vector (11; 18; 28) - h_B(0, 0, 0) = 11; - h_B(1, 0, 0) = 18; - h_B(2, 0, 0) = 28; + h_B.view(gt::all, 0, 0) = gt::gtensor{11., 18., 28.}; // first batch, second rhs col vector (73; 78; 154) - h_B(0, 1, 0) = 73; - h_B(1, 1, 0) = 78; - h_B(2, 1, 0) = 154; + h_B.view(gt::all, 1, 0) = gt::gtensor{73., 78., 154.}; // second batch, first rhs col vector (73; 78; 154) - h_B(0, 0, 1) = T(11, -1); - h_B(1, 0, 1) = T(14, 4); - h_B(2, 0, 1) = T(16, 12); + h_B.view(gt::all, 0, 1) = + gt::gtensor{T(11., -1.), T(14., 4.), T(16., 12.)}; // second batch, second rhs col vector (73-10i; 90-12i; 112 + 42i) - h_B(0, 1, 1) = T(73, -10); - h_B(1, 1, 1) = T(90, -12); - h_B(2, 1, 1) = T(112, 42); + h_B.view(gt::all, 1, 1) = + gt::gtensor{T(73., -10.), T(90., -12.), T(112., 42.)}; h_Bptr(0) = gt::raw_pointer_cast(d_B.data()); h_Bptr(1) = h_Bptr(0) + N * NRHS; @@ -449,21 +382,12 @@ void test_getrs_batch_complex() gt::copy(d_B, h_B); // first batch, first solution vector [1; 2; 3] - expect_complex_near(h_B(0, 0, 0), 1.0); - expect_complex_near(h_B(1, 0, 0), 2.0); - expect_complex_near(h_B(2, 0, 0), 3.0); // first batch, second solution vector [-3; 7; 31] - expect_complex_near(h_B(0, 1, 0), -3.0); - expect_complex_near(h_B(1, 1, 0), 7.0); - expect_complex_near(h_B(2, 1, 0), 31.0); // second batch, first solution vector [1; 2; 3] - expect_complex_near(h_B(0, 0, 1), 1.0); - expect_complex_near(h_B(1, 0, 1), 2.0); - expect_complex_near(h_B(2, 0, 1), 3.0); // second batch, second solution vector [-3; 7; 31] - expect_complex_near(h_B(0, 1, 1), -3.0); - expect_complex_near(h_B(1, 1, 1), 7.0); - expect_complex_near(h_B(2, 1, 1), 31.0); + auto expected_B = gt::gtensor( + {{{1., 2., 3.}, {-3., 7., 31.}}, {{1., 2., 3.}, {-3., 7., 31.}}}); + EXPECT_NEAR(gt::sum_squares(h_B - expected_B), 0., 1e-10); } TEST(lapack, cgetrs_batch) @@ -539,30 +463,15 @@ void test_getri_batch_complex() gt::copy(d_C, h_C); - // first batch, first solution col [1; -2; 2] - expect_complex_near(h_C(0, 0, 0), 1.0); - expect_complex_near(h_C(1, 0, 0), -2.0); - expect_complex_near(h_C(2, 0, 0), 2.0); - // first batch, second solution col [1; -1; 0.5] - expect_complex_near(h_C(0, 1, 0), 1.0); - expect_complex_near(h_C(1, 1, 0), -1.0); - expect_complex_near(h_C(2, 1, 0), 0.5); - // first batch, third solution col [-1; 1.5; -1] - expect_complex_near(h_C(0, 2, 0), -1.0); - expect_complex_near(h_C(1, 2, 0), 1.5); - expect_complex_near(h_C(2, 2, 0), -1.0); - // second batch, first solution col - expect_complex_near(h_C(0, 0, 1), T(-0.1, 0.3)); - expect_complex_near(h_C(1, 0, 1), T(0.04, 0.28)); - expect_complex_near(h_C(2, 0, 1), T(0.52, -0.36)); - // second batch, second solution col - expect_complex_near(h_C(0, 1, 1), T(-0.04, -0.28)); - expect_complex_near(h_C(1, 1, 1), T(0.016, -0.088)); - expect_complex_near(h_C(2, 1, 1), T(-0.092, 0.256)); - // second batch, third solution col - expect_complex_near(h_C(0, 2, 1), T(0.07, -0.01)); - expect_complex_near(h_C(1, 2, 1), T(-0.028, -0.096)); - expect_complex_near(h_C(2, 2, 1), T(0.036, 0.052)); + // clang-format off + auto expected_C = + gt::gtensor({{{ 1.0, -2.0, 2.0}, + { 1.0, -1.0, .5}, + {-1.0, 1.5, -1.0}}, + {{T(-0.1 , 0.3 ), T( 0.04 , 0.28 ), T( 0.52 , -0.36 )}, + {T(-0.04, -0.28), T( 0.016, -0.088), T(-0.092, 0.256)}, + {T( 0.07, -0.01), T(-0.028, -0.096), T( 0.036, 0.052)}}}); + // clang-format on } TEST(lapack, cgetri_batch) From 1bf04a349b7c62288e2891caa1fde34d52a9798c Mon Sep 17 00:00:00 2001 From: Kai Germaschewski Date: Tue, 27 Sep 2022 22:25:50 -0400 Subject: [PATCH 3/3] test/lapack: use gt::launch for checking against expected --- tests/test_lapack.cxx | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/test_lapack.cxx b/tests/test_lapack.cxx index 1c56ef2d..6e9d8625 100644 --- a/tests/test_lapack.cxx +++ b/tests/test_lapack.cxx @@ -79,8 +79,11 @@ void test_getrf_batch_real() gt::copy(d_p, h_p); gt::copy(d_info, h_info); - auto h_A0 = h_A.view(gt::all, gt::all, 0); - EXPECT_NEAR(gt::sum_squares(h_A0 - A0_LU), 0., 1e-14); + gt::launch_host<2>( + A0_LU.shape(), GT_LAMBDA(int i, int j) { + EXPECT_NEAR(h_A(i, j, 0), A0_LU(i, j), 1e-14) + << "i = " << i << " j = " << j; + }); EXPECT_EQ(h_p.view(gt::all, 0), A0_piv); EXPECT_EQ(h_info, gt::zeros({batch_size})); @@ -199,13 +202,17 @@ void test_getrf_batch_complex() gt::copy(d_info, h_info); // first batch matrix result - gt::gtensor h_A0 = h_A.view(gt::all, gt::all, 0); - EXPECT_NEAR(gt::sum_squares(h_A0 - gt::gtensor(A0_LU)), 0., 1e-14); + gt::launch_host<2>( + A0_LU.shape(), GT_LAMBDA(int i, int j) { + expect_complex_near(h_A(i, j, 0), T(A0_LU(i, j)), 1e-6); + }); EXPECT_EQ(h_p.view(gt::all, 0), A0_piv); // second batch matrix result - gt::gtensor h_A1 = h_A.view(gt::all, gt::all, 1); - EXPECT_NEAR(gt::sum_squares(h_A1 - gt::gtensor(A1_LU)), 0., 1e-14); + gt::launch_host<2>( + A0_LU.shape(), GT_LAMBDA(int i, int j) { + expect_complex_near(h_A(i, j, 1), T(A1_LU(i, j)), 1e-6); + }); EXPECT_EQ(h_p.view(gt::all, 1), A1_piv); EXPECT_EQ(h_info, gt::zeros({batch_size})); @@ -387,7 +394,10 @@ void test_getrs_batch_complex() // second batch, second solution vector [-3; 7; 31] auto expected_B = gt::gtensor( {{{1., 2., 3.}, {-3., 7., 31.}}, {{1., 2., 3.}, {-3., 7., 31.}}}); - EXPECT_NEAR(gt::sum_squares(h_B - expected_B), 0., 1e-10); + gt::launch_host<3>( + h_B.shape(), GT_LAMBDA(int i, int j, int k) { + expect_complex_near(h_B(i, j, k), expected_B(i, j, k), 1e-6); + }); } TEST(lapack, cgetrs_batch)