Skip to content

Commit cca2a9f

Browse files
esukhovigcbot
authored andcommitted
IGCVectorizer now supports I32 PHI
IGCVectorizer now supports I32 Phi instructions.
1 parent 0998b31 commit cca2a9f

File tree

5 files changed

+239
-2
lines changed

5 files changed

+239
-2
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ bool isFloatTyped(Instruction *I) {
259259
return I->getType()->isFloatTy();
260260
}
261261

262+
bool isAllowedType(Instruction *I) {
263+
return isFloatTyped(I) ||
264+
(IGC_GET_FLAG_VALUE(VectorizerAllowI32) && I->getType()->isIntegerTy(32));
265+
}
266+
262267
bool isIntrinsicSafe(Instruction *I) {
263268
bool Result = false;
264269
IntrinsicInst *IntrinsicI = llvm::dyn_cast<IntrinsicInst>(I);
@@ -297,14 +302,14 @@ bool isSafeToVectorize(Instruction *I) {
297302

298303
// the only typed instructions we add to slices => Insert elements
299304
bool IsVectorTyped = I->getType()->isVectorTy();
300-
bool IsFloat = isFloatTyped(I);
305+
bool IsAllowedType = isAllowedType(I);
301306

302307
bool Result =
303308
isPHISafe(I) || IsExtract ||
304309
isBinarySafe(I) || isIntrinsicSafe(I) || isAllowedStub(I);
305310

306311
// all allowed instructions that are float typed and not vectors
307-
Result = (Result && IsFloat && !IsVectorTyped);
312+
Result = (Result && IsAllowedType && !IsVectorTyped);
308313
// always allowed
309314
Result |= IsFpTrunc;
310315
// only Float insert elements are allowed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; UNSUPPORTED: system-windows
2+
; REQUIRES: regkeys
3+
4+
; RUN: igc_opt -S -dce -platformpvc -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s
5+
6+
; CHECK: .decl vectorized_phi v_type=G type=d num_elts=128 align=wordx32
7+
8+
; CHECK: mov (M1, 16) vectorized_phi(0,0)<1> 0x0:d
9+
; CHECK: mov (M1, 16) vectorized_phi(1,0)<1> 0x0:d
10+
; CHECK: mov (M1, 16) vectorized_phi(2,0)<1> 0x0:d
11+
; CHECK: mov (M1, 16) vectorized_phi(3,0)<1> 0x0:d
12+
; CHECK: mov (M1, 16) vectorized_phi(4,0)<1> 0x0:d
13+
; CHECK: mov (M1, 16) vectorized_phi(5,0)<1> 0x0:d
14+
; CHECK: mov (M1, 16) vectorized_phi(6,0)<1> 0x0:d
15+
; CHECK: mov (M1, 16) vectorized_phi(7,0)<1> 0x0:d
16+
17+
; CHECK: dpas.?.?.0.0 (M1, 16) vectorized_phi.0 vectorized_phi.0
18+
19+
20+
; ModuleID = 'vectorizer-vector-emission-fmad.ll'
21+
source_filename = "vectorizer-vector-emission-fmad.ll"
22+
23+
define spir_kernel void @_attn_fwd(half addrspace(1)* %0, half addrspace(1)* %1, half addrspace(1)* %2, float %3, i8 addrspace(1)* %4, float addrspace(1)* %5, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i32 %bufferOffset, i32 %bufferOffset1, i32 %bufferOffset2, i32 %bufferOffset3, i32 %bufferOffset4) {
24+
br label %._crit_edge
25+
26+
._crit_edge: ; preds = %._crit_edge.._crit_edge_crit_edge, %6
27+
%7 = phi float [ 0.000000e+00, %6 ], [ %7, %._crit_edge.._crit_edge_crit_edge ]
28+
%vectorized_phi = phi <8 x i32> [ zeroinitializer, %6 ], [ %8, %._crit_edge.._crit_edge_crit_edge ]
29+
%8 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %vectorized_phi, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
30+
br label %._crit_edge.._crit_edge_crit_edge
31+
32+
._crit_edge.._crit_edge_crit_edge: ; preds = %._crit_edge
33+
br label %._crit_edge
34+
}
35+
36+
declare <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
37+
38+
!igc.functions = !{!0}
39+
!IGCMetadata = !{!4}
40+
41+
!0 = distinct !{void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i8*, i32, i32, i32, i32, i32)* @_attn_fwd, !1}
42+
!1 = distinct !{!2, !3}
43+
!2 = distinct !{!"function_type", i32 0}
44+
!3 = distinct !{!"sub_group_size", i32 16}
45+
!4 = distinct !{!"ModuleMD", !5}
46+
!5 = distinct !{!"FuncMD", !6, !7}
47+
!6 = distinct !{!"FuncMDMap[0]", void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i8*, i32, i32, i32, i32, i32)* @_attn_fwd}
48+
!7 = distinct !{!"FuncMDValue[0]", !8}
49+
!8 = distinct !{!"resAllocMD", !9}
50+
!9 = distinct !{!"argAllocMDList", !10}
51+
!10 = distinct !{!"argAllocMDListVec[0]"}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; REQUIRES: regkeys
2+
; RUN: igc_opt -S --igc-vectorizer -dce --regkey=VectorizerLog=1 --regkey=VectorizerLogToErr=1 < %s 2>&1 | FileCheck %s
3+
4+
; CHECK: Start: %25 = insertelement <8 x i32> zeroinitializer, i32 %17, i64 0
5+
; CHECK: Operand [1]: First: %17 = mul i32 %9, %1
6+
; CHECK: Not safe to vectorize
7+
8+
; CHECK: some elements weren't even vectorized
9+
10+
; CHECK: %1 = phi i32 [ 0, %0 ], [ %35, %._crit_edge ]
11+
; CHECK: %2 = phi i32 [ 1, %0 ], [ %36, %._crit_edge ]
12+
; CHECK: %3 = phi i32 [ 2, %0 ], [ %37, %._crit_edge ]
13+
; CHECK: %4 = phi i32 [ 3, %0 ], [ %38, %._crit_edge ]
14+
; CHECK: %5 = phi i32 [ 4, %0 ], [ %39, %._crit_edge ]
15+
; CHECK: %6 = phi i32 [ 5, %0 ], [ %40, %._crit_edge ]
16+
; CHECK: %7 = phi i32 [ 6, %0 ], [ %41, %._crit_edge ]
17+
; CHECK: %8 = phi i32 [ 7, %0 ], [ %42, %._crit_edge ]
18+
; CHECK-NOT: %vectorized_phi
19+
20+
21+
; ModuleID = 'reduced.ll'
22+
source_filename = "initial_test.ll"
23+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
24+
target triple = "spir64-unknown-unknown"
25+
26+
; Function Attrs: convergent nounwind
27+
define spir_kernel void @quux() {
28+
br label %._crit_edge
29+
30+
._crit_edge: ; preds = %._crit_edge, %0
31+
%1 = phi i32 [ 0, %0 ], [ %35, %._crit_edge ]
32+
%2 = phi i32 [ 1, %0 ], [ %36, %._crit_edge ]
33+
%3 = phi i32 [ 2, %0 ], [ %37, %._crit_edge ]
34+
%4 = phi i32 [ 3, %0 ], [ %38, %._crit_edge ]
35+
%5 = phi i32 [ 4, %0 ], [ %39, %._crit_edge ]
36+
%6 = phi i32 [ 5, %0 ], [ %40, %._crit_edge ]
37+
%7 = phi i32 [ 6, %0 ], [ %41, %._crit_edge ]
38+
%8 = phi i32 [ 7, %0 ], [ %42, %._crit_edge ]
39+
%9 = add i32 %1, 1
40+
%10 = add i32 %2, 2
41+
%11 = add i32 %3, 3
42+
%12 = add i32 %4, 4
43+
%13 = add i32 %5, 5
44+
%14 = add i32 %6, 6
45+
%15 = add i32 %7, 7
46+
%16 = add i32 %8, 8
47+
%17 = mul i32 %9, %1
48+
%18 = mul i32 %10, %2
49+
%19 = mul i32 %11, %3
50+
%20 = mul i32 %12, %4
51+
%21 = mul i32 %13, %5
52+
%22 = mul i32 %14, %6
53+
%23 = mul i32 %15, %7
54+
%24 = mul i32 %16, %8
55+
%25 = insertelement <8 x i32> zeroinitializer, i32 %17, i64 0
56+
%26 = insertelement <8 x i32> %25, i32 %18, i64 1
57+
%27 = insertelement <8 x i32> %26, i32 %19, i64 2
58+
%28 = insertelement <8 x i32> %27, i32 %20, i64 3
59+
%29 = insertelement <8 x i32> %28, i32 %21, i64 4
60+
%30 = insertelement <8 x i32> %29, i32 %22, i64 5
61+
%31 = insertelement <8 x i32> %30, i32 %23, i64 6
62+
%32 = insertelement <8 x i32> %31, i32 %24, i64 7
63+
%33 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %32, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
64+
%34 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %33, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
65+
%35 = extractelement <8 x i32> %34, i64 0
66+
%36 = extractelement <8 x i32> %34, i64 1
67+
%37 = extractelement <8 x i32> %34, i64 2
68+
%38 = extractelement <8 x i32> %34, i64 3
69+
%39 = extractelement <8 x i32> %34, i64 4
70+
%40 = extractelement <8 x i32> %34, i64 5
71+
%41 = extractelement <8 x i32> %34, i64 6
72+
%42 = extractelement <8 x i32> %34, i64 7
73+
br label %._crit_edge
74+
}
75+
76+
; Function Attrs: convergent nounwind readnone willreturn
77+
declare <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #1
78+
79+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
80+
declare i32 @llvm.exp2.i32(i32) #2
81+
82+
attributes #0 = { convergent nounwind }
83+
attributes #1 = { convergent nounwind readnone willreturn }
84+
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
85+
86+
!igc.functions = !{!0}
87+
!0 = !{void ()* @quux, !1}
88+
!1 = !{!2, !3}
89+
!2 = !{!"function_type", i32 0}
90+
!3 = !{!"sub_group_size", i32 16}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; RUN: igc_opt --igc-vectorizer -S -dce < %s 2>&1 | FileCheck %s
2+
3+
define spir_kernel void @quux() {
4+
; CHECK-LABEL: @quux(
5+
; CHECK-NEXT: bb43:
6+
; CHECK-NEXT: br label [[BB123:%.*]]
7+
; CHECK: bb60:
8+
; CHECK-NEXT: br label [[BB88:%.*]]
9+
; CHECK: bb88:
10+
; CHECK-NEXT: [[VECTORIZED_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[BB60:%.*]] ], [ [[TMP113:%.*]], [[BB88]] ]
11+
; CHECK-NEXT: [[TMP112:%.*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> [[VECTORIZED_PHI]], <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
12+
; CHECK-NEXT: [[TMP113]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
13+
; CHECK-NEXT: br i1 false, label [[BB88]], label [[BB123]]
14+
; CHECK: bb123:
15+
; CHECK-NEXT: [[VECTORIZED_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[BB43:%.*]] ], [ [[TMP113]], [[BB88]] ]
16+
; CHECK-NEXT: [[TMP151:%.*]] = bitcast <8 x i32> [[VECTORIZED_PHI1]] to <8 x i32>
17+
; CHECK-NEXT: call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> [[TMP151]])
18+
; CHECK-NEXT: ret void
19+
;
20+
bb43:
21+
br label %bb123
22+
23+
bb60: ; No predecessors!
24+
br label %bb88
25+
26+
bb88: ; preds = %bb88, %bb60
27+
%tmp90 = phi i32 [ 0, %bb60 ], [ %tmp114, %bb88 ]
28+
%tmp91 = phi i32 [ 0, %bb60 ], [ %tmp115, %bb88 ]
29+
%tmp92 = phi i32 [ 0, %bb60 ], [ %tmp116, %bb88 ]
30+
%tmp93 = phi i32 [ 0, %bb60 ], [ %tmp117, %bb88 ]
31+
%tmp94 = phi i32 [ 0, %bb60 ], [ %tmp118, %bb88 ]
32+
%tmp95 = phi i32 [ 0, %bb60 ], [ %tmp119, %bb88 ]
33+
%tmp96 = phi i32 [ 0, %bb60 ], [ %tmp120, %bb88 ]
34+
%tmp97 = phi i32 [ 0, %bb60 ], [ %tmp121, %bb88 ]
35+
%tmp104 = insertelement <8 x i32> zeroinitializer, i32 %tmp90, i64 0
36+
%tmp105 = insertelement <8 x i32> %tmp104, i32 %tmp91, i64 1
37+
%tmp106 = insertelement <8 x i32> %tmp105, i32 %tmp92, i64 2
38+
%tmp107 = insertelement <8 x i32> %tmp106, i32 %tmp93, i64 3
39+
%tmp108 = insertelement <8 x i32> %tmp107, i32 %tmp94, i64 4
40+
%tmp109 = insertelement <8 x i32> %tmp108, i32 %tmp95, i64 5
41+
%tmp110 = insertelement <8 x i32> %tmp109, i32 %tmp96, i64 6
42+
%tmp111 = insertelement <8 x i32> %tmp110, i32 %tmp97, i64 7
43+
%tmp112 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %tmp111, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
44+
%tmp113 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
45+
%tmp114 = extractelement <8 x i32> %tmp113, i64 0
46+
%tmp115 = extractelement <8 x i32> %tmp113, i64 1
47+
%tmp116 = extractelement <8 x i32> %tmp113, i64 2
48+
%tmp117 = extractelement <8 x i32> %tmp113, i64 3
49+
%tmp118 = extractelement <8 x i32> %tmp113, i64 4
50+
%tmp119 = extractelement <8 x i32> %tmp113, i64 5
51+
%tmp120 = extractelement <8 x i32> %tmp113, i64 6
52+
%tmp121 = extractelement <8 x i32> %tmp113, i64 7
53+
br i1 false, label %bb88, label %bb123
54+
55+
bb123: ; preds = %bb88, %bb43
56+
%tmp133 = phi i32 [ 0, %bb43 ], [ %tmp114, %bb88 ]
57+
%tmp134 = phi i32 [ 0, %bb43 ], [ %tmp115, %bb88 ]
58+
%tmp135 = phi i32 [ 0, %bb43 ], [ %tmp116, %bb88 ]
59+
%tmp136 = phi i32 [ 0, %bb43 ], [ %tmp117, %bb88 ]
60+
%tmp137 = phi i32 [ 0, %bb43 ], [ %tmp118, %bb88 ]
61+
%tmp138 = phi i32 [ 0, %bb43 ], [ %tmp119, %bb88 ]
62+
%tmp139 = phi i32 [ 0, %bb43 ], [ %tmp120, %bb88 ]
63+
%tmp140 = phi i32 [ 0, %bb43 ], [ %tmp121, %bb88 ]
64+
%tmp143 = insertelement <8 x i32> zeroinitializer, i32 %tmp133, i64 0
65+
%tmp144 = insertelement <8 x i32> %tmp143, i32 %tmp134, i64 1
66+
%tmp145 = insertelement <8 x i32> %tmp144, i32 %tmp135, i64 2
67+
%tmp146 = insertelement <8 x i32> %tmp145, i32 %tmp136, i64 3
68+
%tmp147 = insertelement <8 x i32> %tmp146, i32 %tmp137, i64 4
69+
%tmp148 = insertelement <8 x i32> %tmp147, i32 %tmp138, i64 5
70+
%tmp149 = insertelement <8 x i32> %tmp148, i32 %tmp139, i64 6
71+
%tmp150 = insertelement <8 x i32> %tmp149, i32 %tmp140, i64 7
72+
%tmp151 = bitcast <8 x i32> %tmp150 to <8 x i32>
73+
call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %tmp151)
74+
ret void
75+
}
76+
77+
declare <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
78+
79+
declare <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
80+
81+
declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
82+
83+
declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
84+
85+
!igc.functions = !{!0}
86+
!0 = !{void ()* @quux, !1}
87+
!1 = !{!2, !3}
88+
!2 = !{!"function_type", i32 0}
89+
!3 = !{!"sub_group_size", i32 16}
90+

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ DECLARE_IGC_REGKEY(DWORD, VectorizerDepWindowMultiplier, 2,
963963
DECLARE_IGC_REGKEY(bool, VectorizerCheckScalarizer, false, "Add scalariser after vectorizer to check performance", true)
964964
DECLARE_IGC_REGKEY(DWORD, VectorizerList, -1, "Vectorize only one seed instruction with the provided number", true)
965965
DECLARE_IGC_REGKEY(bool, EnableVectorEmitter, true, "Enable Vector Emission for a vectorizer", true)
966+
DECLARE_IGC_REGKEY(bool, VectorizerAllowI32, true, "Allow I32 versions of instructions inside vectorizer", true)
966967
DECLARE_IGC_REGKEY(bool, VectorizerAllowFPTRUNC, true, "Allow FPTRUNC instructions inside vectorizer", true)
967968
DECLARE_IGC_REGKEY(bool, VectorizerAllowFDIV, true, "Allow FDIV instructions inside vectorizer", true)
968969
DECLARE_IGC_REGKEY(bool, VectorizerAllowFMUL, true, "Allow FMUL instructions inside vectorizer", true)

0 commit comments

Comments
 (0)