From ac11a2f2e04ffff5afd4df8a043f3ded59044406 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 21 Nov 2025 21:32:27 -0400 Subject: [PATCH] `shfl_down` intrinsics Co-Authored-By: Anton Smirnov --- src/intrinsics.jl | 26 ++++++++++++++++++++++++++ test/intrinsics.jl | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/intrinsics.jl b/src/intrinsics.jl index 80ccf6f0..da617398 100644 --- a/src/intrinsics.jl +++ b/src/intrinsics.jl @@ -116,6 +116,32 @@ Declare memory that is local to a workgroup. """ localmemory(::Type{T}, dims) where {T} = localmemory(T, Val(dims)) +""" + shfl_down(val::T, offset::Integer)::T where T + +Read `val` from a lane with higher id given by `offset`. + +!!! note + Backend implementations **must** implement: + ``` + @device_override shfl_down(val::T, offset::Integer)::T where T + ``` + As well as the on-device functionality. +""" +function shfl_down end + +""" + shfl_down_types(::Backend)::Vector{DataType} + +Returns a vector of `DataType`s supported on `backend` + +!!! note + Backend implementations **must** implement this function + only if they support `shfl_down` for any types. +""" +shfl_down_types(::Backend) = DataType[] + + """ barrier() diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 97548c47..68fa9e48 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -20,6 +20,32 @@ function test_intrinsics_kernel(results) return end +# Do NOT use this kernel as an example for your code. +# It was written assuming one workgroup of size 32 and +# is only valid for those +function shfl_down_test_kernel(a, b) + # This is not valid + idx = KI.get_local_id().x + + temp = KI.localmemory(eltype(b), 32) + temp[idx] = a[idx] + + KI.barrier() + + if idx == 1 + value = temp[idx] + + value = value + KI.shfl_down(value, 16) + value = value + KI.shfl_down(value, 8) + value = value + KI.shfl_down(value, 4) + value = value + KI.shfl_down(value, 2) + value = value + KI.shfl_down(value, 1) + + b[idx] = value + end + return +end + function intrinsics_testsuite(backend, AT) @testset "KernelIntrinsics Tests" begin @testset "Launch parameters" begin @@ -119,6 +145,18 @@ function intrinsics_testsuite(backend, AT) @test local_id_x == expected_local end end + @testset "shfl_down(::$T)" for T in KI.shfl_down_types(backend()) + a = zeros(T, 32) + rand!(a, (1:4)) + + dev_a = AT(a) + dev_b = AT(zeros(T, 32)) + + KI.@kernel backend() workgroupsize=32 shfl_down_test_kernel(dev_a, dev_b) + + b = Array(dev_b) + @test sum(a) ≈ b[1] + end end return nothing end