From c2bfbbf6424800007888d016f13c0b501b200c68 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Mon, 17 Nov 2025 22:54:28 +0100 Subject: [PATCH 1/5] Fix too relaxed check on CUDA "fast copy" (can_be_transposed) condition --- ggml/src/ggml-cuda/cpy.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu index 50612237c8a23..6d9fe5145880e 100644 --- a/ggml/src/ggml-cuda/cpy.cu +++ b/ggml/src/ggml-cuda/cpy.cu @@ -384,7 +384,8 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg char * src1_ddc = (char *) src1->data; const bool contiguous_srcs = ggml_is_contiguous(src0) && ggml_is_contiguous(src1); - const bool can_be_transposed = nb01 == (int64_t)ggml_element_size(src0) && src0->ne[3] == 1; + const bool can_be_transposed = nb01 == (int64_t)ggml_element_size(src0) && + src0->ne[3] == 1 && nb02 == ne00 * ne01 * (int64_t)ggml_element_size(src0); if (src0->type == src1->type && contiguous_srcs) { GGML_ASSERT(ggml_nbytes(src0) == ggml_nbytes(src1)); From 5e7c26f297a57d4feac9811016ac72d94e1fb8a9 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Mon, 17 Nov 2025 22:59:28 +0100 Subject: [PATCH 2/5] Argh. --- ggml/src/ggml-cuda/cpy.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu index 6d9fe5145880e..c1afde9627f09 100644 --- a/ggml/src/ggml-cuda/cpy.cu +++ b/ggml/src/ggml-cuda/cpy.cu @@ -384,7 +384,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg char * src1_ddc = (char *) src1->data; const bool contiguous_srcs = ggml_is_contiguous(src0) && ggml_is_contiguous(src1); - const bool can_be_transposed = nb01 == (int64_t)ggml_element_size(src0) && + const bool can_be_transposed = nb01 == (int64_t)ggml_element_size(src0) && src0->ne[3] == 1 && nb02 == ne00 * ne01 * (int64_t)ggml_element_size(src0); if (src0->type == src1->type && contiguous_srcs) { From d51f719607bbdbc4d19b28f2b87e5c776f218300 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Mon, 17 Nov 2025 23:29:20 +0100 Subject: [PATCH 3/5] Making CISC happy ;) --- tests/test-backend-ops.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 267bead8c4ab7..173bc1a04f12a 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2800,6 +2800,33 @@ struct test_cont : public test_case { } }; +struct test_irregular_cont : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_irregular_cont(ggml_type type = GGML_TYPE_F32, + std::array ne = {1, 4, 2, 1}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(src); + ggml_set_name(src, "src"); + + ggml_tensor * view = ggml_view_4d(ctx, src, src->ne[0], 1, src->ne[2], src->ne[3], + src->nb[1], src->nb[2], src->nb[3], src->nb[0] * (src->ne[1] - 1)); + + ggml_tensor * out = ggml_cont(ctx, view); + ggml_set_name(out, "out"); + + return out; + } +}; + // GGML_OP_ADD // GGML_OP_SUB // GGML_OP_MUL @@ -6956,6 +6983,11 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5})); test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_irregular_cont()); + test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_F32, {1, 8, 17, 1})); + test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_BF16, {1, 4, 2, 1})); + test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_BF16, {1, 8, 17, 1})); + auto add_test_bin_bcast = [&](ggml_type type, std::array ne, std::array nr) { for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) { test_cases.emplace_back(new test_bin_bcast(op, type, ne, nr)); From f378da999b17f9aa024164b97a3b1077b09cbedf Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 18 Nov 2025 13:40:16 +0100 Subject: [PATCH 4/5] Integrate CONT tests --- tests/test-backend-ops.cpp | 67 ++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 173bc1a04f12a..7f01ac959f714 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2776,51 +2776,34 @@ struct test_cpy : public test_case { struct test_cont : public test_case { const ggml_type type; const std::array ne; + bool use_view_slice; std::string vars() override { - return VARS_TO_STR2(type, ne); + return VARS_TO_STR3(type, ne, use_view_slice); } test_cont(ggml_type type = GGML_TYPE_F32, - std::array ne = {10, 10, 10, 1}) - : type(type), ne(ne) {} + std::array ne = {10, 10, 10, 1}, + bool use_view_slice = false) + : type(type), ne(ne), use_view_slice(use_view_slice) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data()); ggml_set_param(src); ggml_set_name(src, "src"); - src = ggml_transpose(ctx, src); - ggml_set_name(src, "src_transposed"); - ggml_tensor * out = ggml_cont(ctx, src); - ggml_set_name(out, "out"); - - return out; - } -}; - -struct test_irregular_cont : public test_case { - const ggml_type type; - const std::array ne; - - std::string vars() override { - return VARS_TO_STR2(type, ne); - } - - test_irregular_cont(ggml_type type = GGML_TYPE_F32, - std::array ne = {1, 4, 2, 1}) - : type(type), ne(ne) {} - - ggml_tensor * build_graph(ggml_context * ctx) override { - ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data()); - ggml_set_param(src); - ggml_set_name(src, "src"); - - ggml_tensor * view = ggml_view_4d(ctx, src, src->ne[0], 1, src->ne[2], src->ne[3], - src->nb[1], src->nb[2], src->nb[3], src->nb[0] * (src->ne[1] - 1)); + ggml_tensor * dst; + if (use_view_slice) { + dst = ggml_view_4d(ctx, src, src->ne[0], 1, src->ne[2], src->ne[3], + src->nb[1], src->nb[2], src->nb[3], src->nb[0] * (src->ne[1] - 1)); + ggml_set_name(dst, "src_view_slice"); + } else { + dst = ggml_transpose(ctx, src); + ggml_set_name(dst, "src_transposed"); + } - ggml_tensor * out = ggml_cont(ctx, view); + ggml_tensor * out = ggml_cont(ctx, dst); ggml_set_name(out, "out"); return out; @@ -6983,10 +6966,22 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5})); test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7})); - test_cases.emplace_back(new test_irregular_cont()); - test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_F32, {1, 8, 17, 1})); - test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_BF16, {1, 4, 2, 1})); - test_cases.emplace_back(new test_irregular_cont(GGML_TYPE_BF16, {1, 8, 17, 1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}, true)); + + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {1, 4, 2, 1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {1, 8, 17, 1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {1, 4, 2, 1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {1, 8, 17, 1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {1, 4, 2, 1}, true)); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {1, 8, 17, 1}, true)); auto add_test_bin_bcast = [&](ggml_type type, std::array ne, std::array nr) { for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) { From ccd0dfaaab5249e5f89cc756b82790dccad35056 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 18 Nov 2025 14:02:55 +0100 Subject: [PATCH 5/5] Use loopy loop --- tests/test-backend-ops.cpp | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 7f01ac959f714..d58f1e6889dbe 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6955,33 +6955,14 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 1, 4, 1}, {1, 2, 0, 3}, {0, 0, 0, 0})); - test_cases.emplace_back(new test_cont()); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1})); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5})); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7})); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1})); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5})); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7})); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1})); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5})); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7})); - - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}, true)); - - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {1, 4, 2, 1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {1, 8, 17, 1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {1, 4, 2, 1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {1, 8, 17, 1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {1, 4, 2, 1}, true)); - test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {1, 8, 17, 1}, true)); + for (ggml_type type_dst : { GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16 }) { + for (bool use_view_slice : { true, false }) { + for (std::array ne : std::initializer_list>{ {2, 1, 1, 1}, {2, 1, 3, 5}, + {2, 3, 5, 7}, {1, 4, 2, 1}, {1, 8, 17, 1}, {10, 10, 10, 1} }) { + test_cases.emplace_back(new test_cont(type_dst, ne, use_view_slice)); + } + } + } auto add_test_bin_bcast = [&](ggml_type type, std::array ne, std::array nr) { for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) {