Skip to content

Commit 2f2d265

Browse files
committed
[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds
This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. Marking the GEPs inbounds here means that their output won't change with the later patch. Basically a retry of the very similar PR #131994, as part of an updated stack of PRs. For SWDEV-516125.
1 parent 8ea447b commit 2f2d265

21 files changed

+1236
-1236
lines changed

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
6262
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
6363
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
6464
; GFX90A-NEXT: {{ $}}
65-
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
65+
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
6666
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
6767
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
6868
; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
@@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
959959
; GFX90A-NEXT: successors: %bb.71(0x80000000)
960960
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
961961
; GFX90A-NEXT: {{ $}}
962-
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
962+
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
963963
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
964964
; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
965965
; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
@@ -1007,12 +1007,12 @@ bb:
10071007
%i11 = icmp eq i32 %i, 0
10081008
%i12 = load i32, ptr addrspace(3) null, align 8
10091009
%i13 = zext i32 %i12 to i64
1010-
%i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13
1010+
%i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
10111011
br i1 %arg3, label %bb15, label %bb103
10121012

10131013
bb15:
10141014
%i16 = zext i32 %i to i64
1015-
%i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16
1015+
%i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16
10161016
%i18 = ptrtoint ptr addrspace(1) %i17 to i64
10171017
br i1 %arg4, label %bb19, label %bb20
10181018

@@ -1021,7 +1021,7 @@ bb19:
10211021
unreachable
10221022

10231023
bb20:
1024-
%i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256
1024+
%i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256
10251025
%i22 = ptrtoint ptr addrspace(1) %i21 to i64
10261026
%i23 = inttoptr i64 %i22 to ptr
10271027
%i24 = load i8, ptr %i23, align 1
@@ -1033,7 +1033,7 @@ bb26:
10331033
unreachable
10341034

10351035
bb27:
1036-
%i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512
1036+
%i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512
10371037
%i29 = ptrtoint ptr addrspace(1) %i28 to i64
10381038
%i30 = inttoptr i64 %i29 to ptr
10391039
%i31 = load i8, ptr %i30, align 1
@@ -1045,7 +1045,7 @@ bb33:
10451045
unreachable
10461046

10471047
bb34:
1048-
%i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768
1048+
%i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768
10491049
%i36 = ptrtoint ptr addrspace(1) %i35 to i64
10501050
%i37 = inttoptr i64 %i36 to ptr
10511051
%i38 = load i8, ptr %i37, align 1
@@ -1057,7 +1057,7 @@ bb40:
10571057
unreachable
10581058

10591059
bb41:
1060-
%i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024
1060+
%i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024
10611061
%i43 = ptrtoint ptr addrspace(1) %i42 to i64
10621062
%i44 = inttoptr i64 %i43 to ptr
10631063
%i45 = load i8, ptr %i44, align 1
@@ -1069,7 +1069,7 @@ bb47:
10691069
unreachable
10701070

10711071
bb48:
1072-
%i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280
1072+
%i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280
10731073
%i50 = ptrtoint ptr addrspace(1) %i49 to i64
10741074
%i51 = inttoptr i64 %i50 to ptr
10751075
%i52 = load i8, ptr %i51, align 1
@@ -1081,7 +1081,7 @@ bb54:
10811081
unreachable
10821082

10831083
bb55:
1084-
%i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536
1084+
%i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536
10851085
%i57 = ptrtoint ptr addrspace(1) %i56 to i64
10861086
%i58 = or i64 %i57, 1
10871087
%i59 = inttoptr i64 %i58 to ptr
@@ -1113,7 +1113,7 @@ bb67:
11131113

11141114
bb68:
11151115
%i69 = zext i1 %arg5 to i8
1116-
%i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16
1116+
%i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16
11171117
%i71 = ptrtoint ptr addrspace(1) %i70 to i64
11181118
br i1 %arg5, label %bb72, label %bb73
11191119

@@ -1122,7 +1122,7 @@ bb72:
11221122
unreachable
11231123

11241124
bb73:
1125-
%i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256
1125+
%i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256
11261126
%i75 = ptrtoint ptr addrspace(1) %i74 to i64
11271127
%i76 = inttoptr i64 %i75 to ptr
11281128
%i77 = load i8, ptr %i76, align 1
@@ -1134,7 +1134,7 @@ bb79:
11341134
unreachable
11351135

11361136
bb80:
1137-
%i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512
1137+
%i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512
11381138
%i82 = ptrtoint ptr addrspace(1) %i81 to i64
11391139
%i83 = or i64 %i82, 1
11401140
br i1 %arg6, label %bb84, label %bb85
@@ -1269,7 +1269,7 @@ bb174:
12691269
%i182 = select i1 %arg3, i32 %i181, i32 0
12701270
%i183 = or i32 %i182, %i154
12711271
%i184 = or i32 %i183, %i156
1272-
%i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13
1272+
%i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13
12731273
br i1 %arg3, label %bb186, label %bb196
12741274

12751275
bb186:

0 commit comments

Comments
 (0)