Skip to content

Commit a29088c

Browse files
obrotowyigcbot
authored andcommitted
ConstCoalescing SEXT/ZEXT Fix
Fix bug where pass treated zero-extended values as sign-extended values in add instruction
1 parent d9eda71 commit a29088c

File tree

3 files changed

+74
-6
lines changed

3 files changed

+74
-6
lines changed

IGC/Compiler/CISACodeGen/ConstantCoalescing.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,8 @@ void ConstantCoalescing::MergeUniformLoad(Instruction *load, Value *bufIdxV, uin
845845
auto shouldMerge = [&](const BufChunk *cur_chunk) {
846846
if (CompareBufferBase(cur_chunk->bufIdxV, cur_chunk->addrSpace, bufIdxV, addrSpace) &&
847847
cur_chunk->baseIdxV == eltIdxV && cur_chunk->chunkIO->getType()->getScalarType() == loadEltTy &&
848-
CompareMetadata(cur_chunk->chunkIO, load) && !CheckForAliasingWrites(addrSpace, cur_chunk->chunkIO, load)) {
848+
CompareMetadata(cur_chunk->chunkIO, load) && !CheckForAliasingWrites(addrSpace, cur_chunk->chunkIO, load) &&
849+
cur_chunk->extensionKind == Extension) {
849850
uint lb = std::min(eltid, cur_chunk->chunkStart);
850851
uint ub = std::max(eltid + maxEltPlus, cur_chunk->chunkStart + cur_chunk->chunkSize);
851852
if (profitableChunkSize(ub, lb, scalarSizeInBytes) && (isDwordAligned || eltid >= cur_chunk->chunkStart)) {
@@ -893,6 +894,7 @@ void ConstantCoalescing::MergeUniformLoad(Instruction *load, Value *bufIdxV, uin
893894
cov_chunk->elementSize = scalarSizeInBytes;
894895
cov_chunk->chunkStart = eltid;
895896
cov_chunk->chunkSize = RoundChunkSize(maxEltPlus);
897+
cov_chunk->extensionKind = Extension;
896898
const alignment_t chunkAlignment = std::max<alignment_t>(alignment, 4);
897899
cov_chunk->chunkIO = CreateChunkLoad(load, cov_chunk, eltid, chunkAlignment, Extension);
898900
chunk_vec.push_back(cov_chunk);

IGC/Compiler/CISACodeGen/ConstantCoalescing.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ using namespace llvm;
3434

3535
namespace IGC {
3636

37+
enum ExtensionKind {
38+
EK_NotExtended,
39+
EK_SignExt,
40+
EK_ZeroExt,
41+
};
42+
3743
struct BufChunk {
3844
llvm::Value *bufIdxV; // buffer index when it is indirect
3945
llvm::Value *baseIdxV; // base-address index when it is indirect
@@ -43,6 +49,7 @@ struct BufChunk {
4349
uint chunkSize; // chunk size in elements
4450
llvm::Instruction *chunkIO; // coalesced load
4551
uint loadOrder; // direct CB used order.
52+
ExtensionKind extensionKind;
4653
};
4754

4855
class ConstantCoalescing : public llvm::FunctionPass {
@@ -70,11 +77,6 @@ class ConstantCoalescing : public llvm::FunctionPass {
7077
virtual StringRef getPassName() const override { return IGCOpts::ConstantCoalescingPass; }
7178

7279
private:
73-
enum ExtensionKind {
74-
EK_NotExtended,
75-
EK_SignExt,
76-
EK_ZeroExt,
77-
};
7880

7981
class IRBuilderWrapper : protected llvm::IGCIRBuilder<> {
8082

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt --typed-pointers --igc-constant-coalescing -dce -S < %s | FileCheck %s
10+
11+
12+
define void @test_sext_zext(i32 addrspace(2)* %ca, i16 %offset) {
13+
; CHECK-LABEL: @test_sext_zext
14+
; CHECK: [[S1:%.*]] = sext i16 %offset to i32
15+
; CHECK: [[Z1:%.*]] = zext i16 %offset to i32
16+
; CHECK: [[Z2:%.*]] = add i32 [[Z1]], 4
17+
; CHECK: [[PTR:%.*]] = ptrtoint i32 addrspace(2)* %ca to i32
18+
; CHECK: [[I1:%.*]] = add i32 [[PTR]], [[S1]]
19+
; CHECK: [[I2:%.*]] = add i32 [[PTR]], [[Z2]]
20+
; CHECK: [[PTR1:%.*]] = inttoptr i32 {{%.*}} to <1 x i32> addrspace(2)*
21+
; CHECK: {{%.*}} = load <1 x i32>, <1 x i32> addrspace(2)* [[PTR1]], align 4
22+
; CHECK: [[PTR2:%.*]] = inttoptr i32 {{%.*}} to <1 x i32> addrspace(2)*
23+
; CHECK: {{%.*}} = load <1 x i32>, <1 x i32> addrspace(2)* [[PTR2]], align 4
24+
%s1 = sext i16 %offset to i32
25+
%z1 = zext i16 %offset to i32
26+
%z2 = add i32 %z1, 4
27+
%p1 = ptrtoint i32 addrspace(2)* %ca to i32
28+
%i1 = add i32 %p1, %s1
29+
%i2 = add i32 %p1, %z2
30+
%a1 = inttoptr i32 %i1 to i32 addrspace(2)*
31+
%a2 = inttoptr i32 %i2 to i32 addrspace(2)*
32+
%1 = load i32, i32 addrspace(2)* %a2, align 4
33+
%2 = load i32, i32 addrspace(2)* %a1, align 4
34+
call void @use.i32(i32 %1)
35+
call void @use.i32(i32 %2)
36+
ret void
37+
}
38+
39+
define void @test_zext_coalescing(i32 addrspace(2)* %ca, i16 %offset) {
40+
; CHECK-LABEL: @test_zext_coalescing
41+
; CHECK: {{%.*}} = load <2 x i32>, <2 x i32> addrspace(2)* {{%.*}}, align 4
42+
; CHECK-NOT: load
43+
%z1 = zext i16 %offset to i32
44+
%z2 = add i32 %z1, 4
45+
%p1 = ptrtoint i32 addrspace(2)* %ca to i32
46+
%i1 = add i32 %p1, %z1
47+
%i2 = add i32 %p1, %z2
48+
%a1 = inttoptr i32 %i1 to i32 addrspace(2)*
49+
%a2 = inttoptr i32 %i2 to i32 addrspace(2)*
50+
%1 = load i32, i32 addrspace(2)* %a2, align 4
51+
%2 = load i32, i32 addrspace(2)* %a1, align 4
52+
call void @use.i32(i32 %1)
53+
call void @use.i32(i32 %2)
54+
ret void
55+
}
56+
57+
declare void @use.i32(i32)
58+
59+
!igc.functions = !{!0, !4}
60+
!0 = !{void (i32 addrspace(2)*, i16)* @test_sext_zext, !1}
61+
!1 = !{!2, !3}
62+
!2 = !{!"function_type", i32 0}
63+
!3 = !{!"implicit_arg_desc"}
64+
!4 = !{void (i32 addrspace(2)*, i16)* @test_zext_coalescing, !1}

0 commit comments

Comments
 (0)