From 8220b6d4f936f92afbeaf1aa64d54fc3a5fed54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 17 Nov 2025 17:31:12 +0100 Subject: [PATCH 1/2] support non-contiguous i32 to i32 copy --- ggml/src/ggml-cuda/cpy.cu | 6 ++++++ ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++ 2 files changed, 9 insertions(+) diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu index 50612237c8a23..5424b122b82aa 100644 --- a/ggml/src/ggml-cuda/cpy.cu +++ b/ggml/src/ggml-cuda/cpy.cu @@ -475,6 +475,12 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg } else { ggml_cpy_flt_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); } + } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) { + if (can_be_transposed) { + ggml_cpy_flt_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); + } else { + ggml_cpy_flt_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream); + } } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_I32) { if (contiguous_srcs) { ggml_cpy_flt_contiguous_cuda (src0_ddc, src1_ddc, ne, main_stream); diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 7d792e60cf9c5..c3500ca6eaf5b 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4011,6 +4011,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g if (src0_type == GGML_TYPE_I32 && src1_type == GGML_TYPE_F32) { return true; } + if (src0_type == GGML_TYPE_I32 && src1_type == GGML_TYPE_I32) { + return true; + } if (src0_type == src1_type && ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1])) { return true; } From 2ba2b583c2c639cd9ab5db54686d3934347f07f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 17 Nov 2025 17:31:47 +0100 Subject: [PATCH 2/2] add tests --- tests/test-backend-ops.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 267bead8c4ab7..3e300e6470751 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6943,12 +6943,17 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); + test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_I32, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); + test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_I32, {256, 1, 4, 1}, {1, 2, 0, 3}, {0, 0, 0, 0})); test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 1, 4, 1}, {1, 2, 0, 3}, {0, 0, 0, 0})); test_cases.emplace_back(new test_cont()); test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1})); test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5})); test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_cont(GGML_TYPE_I32, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_I32, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_I32, {2, 3, 5 ,7})); test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1})); test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5})); test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));