diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 74ea86774a8ee..3ef87dc38a85d 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -113,6 +113,8 @@ def v4096i1 : VTVec<4096, i1>; // 4096 x i1 vector value def v128i2 : VTVec<128, i2>; // 128 x i2 vector value def v256i2 : VTVec<256, i2>; // 256 x i2 vector value +def v16i4 : VTVec<16, i4>; // 16 x i4 vector value +def v32i4 : VTVec<32, i4>; // 32 x i4 vector value def v64i4 : VTVec<64, i4>; // 64 x i4 vector value def v128i4 : VTVec<128, i4>; // 128 x i4 vector value diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 102ca92856bae..03d4f9c09dc2a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -876,7 +876,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, {MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16, - MVT::v32f16, MVT::v32bf16}, + MVT::v32f16, MVT::v32bf16, MVT::v16i4}, Custom); setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll index e754f665c5f43..5e52b2fca32c8 100644 --- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll @@ -65,6 +65,21 @@ define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1 ret void } +; GCN-LABEL: {{^}}v_select_v32i4: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v32i4(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(4) %b.ptr, i32 %c) #0 { + %a = load <32 x i4>, ptr addrspace(1) %a.ptr, align 2 + %b = load <32 x i4>, ptr addrspace(4) %b.ptr, align 2 + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <32 x i4> %a, <32 x i4> %b + store <32 x i4> %select, ptr addrspace(1) %out, align 2 + ret void +} + ; GCN-LABEL: {{^}}select_v4i8: ; GFX89: s_cselect_b32 ; GFX89-NOT: s_cselect_b32