Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/ValueTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ def v4096i1 : VTVec<4096, i1>; // 4096 x i1 vector value
def v128i2 : VTVec<128, i2>; // 128 x i2 vector value
def v256i2 : VTVec<256, i2>; // 256 x i2 vector value

def v16i4 : VTVec<16, i4>; // 16 x i4 vector value
def v32i4 : VTVec<32, i4>; // 32 x i4 vector value
def v64i4 : VTVec<64, i4>; // 64 x i4 vector value
def v128i4 : VTVec<128, i4>; // 128 x i4 vector value

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
{MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8,
MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16,
MVT::v32f16, MVT::v32bf16},
MVT::v32f16, MVT::v32bf16, MVT::v16i4},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do this combine without adding the MVT

Custom);

setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/AMDGPU/select-vectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@ define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1
ret void
}

; GCN-LABEL: {{^}}v_select_v32i4:
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
; GCN-NOT: cndmask
define amdgpu_kernel void @v_select_v32i4(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(4) %b.ptr, i32 %c) #0 {
%a = load <32 x i4>, ptr addrspace(1) %a.ptr, align 2
%b = load <32 x i4>, ptr addrspace(4) %b.ptr, align 2
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <32 x i4> %a, <32 x i4> %b
store <32 x i4> %select, ptr addrspace(1) %out, align 2
ret void
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires a lot more test coverage (i.e., just about every possible IR operation)


; GCN-LABEL: {{^}}select_v4i8:
; GFX89: s_cselect_b32
; GFX89-NOT: s_cselect_b32
Expand Down
Loading