diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp index 8a1cab3417d98..a4486965a851a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp @@ -1197,6 +1197,27 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + case NVPTX::BI__nvvm_bar0_and: + return Builder.CreateZExt( + Builder.CreateIntrinsic( + Intrinsic::nvvm_barrier_cta_red_and_aligned_all, {}, + {Builder.getInt32(0), + Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)), + Builder.getInt32(0))}), + Builder.getInt32Ty()); + case NVPTX::BI__nvvm_bar0_or: + return Builder.CreateZExt( + Builder.CreateIntrinsic( + Intrinsic::nvvm_barrier_cta_red_or_aligned_all, {}, + {Builder.getInt32(0), + Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)), + Builder.getInt32(0))}), + Builder.getInt32Ty()); + case NVPTX::BI__nvvm_bar0_popc: + return Builder.CreateIntrinsic( + Intrinsic::nvvm_barrier_cta_red_popc_aligned_all, {}, + {Builder.getInt32(0), Builder.CreateICmpNE(EmitScalarExpr(E->getArg(0)), + Builder.getInt32(0))}); default: return nullptr; } diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 7a19fc8e24419..cd1447374d000 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -272,6 +272,27 @@ __device__ void nvvm_math(float f1, float f2, double d1, double d2) { __syncthreads(); } +__device__ int nvvm_bar0_reductions(int i) { + // CHECK-LABEL: nvvm_bar0_reductions + + int ret = 0; + // CHECK: %[[NE:[0-9]+]] = icmp ne i32 %{{[0-9]+}}, 0 + // CHECK: %[[AND:[0-9]+]] = call i1 @llvm.nvvm.barrier.cta.red.and.aligned.all(i32 0, i1 %[[NE]]) + // CHECK: zext i1 %[[AND]] to i32 + ret += __nvvm_bar0_and(i); + + // CHECK: %[[NE:[0-9]+]] = icmp ne i32 %{{[0-9]+}}, 0 + // CHECK: %[[OR:[0-9]+]] = call i1 @llvm.nvvm.barrier.cta.red.or.aligned.all(i32 0, i1 %[[NE]]) + // CHECK: zext i1 %[[OR]] to i32 + ret += __nvvm_bar0_or(i); + + // CHECK: %[[NE:[0-9]+]] = icmp ne i32 %{{[0-9]+}}, 0 + // CHECK: %[[POPC:[0-9]+]] = call i32 @llvm.nvvm.barrier.cta.red.popc.aligned.all(i32 0, i1 %[[NE]]) + ret += __nvvm_bar0_popc(i); + + return ret; +} + __device__ int di; __shared__ int si; __device__ long dl;