From 31cb22e7608876613b3388b5cde1def498327ffc Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Wed, 16 Apr 2025 00:18:52 -0700 Subject: [PATCH 01/14] [HEXAGON] [MachinePipeliner] Fix the DAG in case of dependent phis. %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 %27:intregs = A2_zxtb %3:intregs - SU2 %13:intregs = C2_muxri %45:predregs, 0, %46:intreg If we have dependent phis, SU0 should be the successor of SU1 not the other way around. --- llvm/lib/CodeGen/MachinePipeliner.cpp | 20 +++- .../CodeGen/Hexagon/swp-dependent-phis.ll | 96 +++++++++++++++++++ llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 6 +- 3 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 6cb0299a30d7a..de10402fe7f48 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -962,8 +962,26 @@ void SwingSchedulerDAG::updatePhiDependences() { HasPhiDef = Reg; // Add a chain edge to a dependent Phi that isn't an existing // predecessor. + + // %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 + // %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 + // %27:intregs = A2_zxtb %3:intregs - SU2 + // %13:intregs = C2_muxri %45:predregs, 0, %46:intreg + // If we have dependent phis, SU0 should be the successor of SU1 + // not the other way around. (it used to be SU1 is the successor + // of SU0). In some cases, SU0 is scheduled earlier than SU1 + // resulting in bad IR as we do not have a value that can be used + // by SU2. + + // Reachability check is to ensure that we do not violate DAG. + // %1:intregs = PHI %10:intregs, %bb.0, %3:intregs, %bb.1 - SU0 + // %2:intregs = PHI %10:intregs, %bb.0, %1:intregs, %bb.1 - SU1 + // %3:intregs = PHI %11:intregs, %bb.0, %2:intregs, %bb.1 - SU2 + // S2_storerb_io %0:intregs, 0, %2:intregs + // Make sure we do not create an edge between SU2 and SU0. + if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) - I.addPred(SDep(SU, SDep::Barrier)); + SU->addPred(SDep(&I, SDep::Barrier)); } } } diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll new file mode 100644 index 0000000000000..bff89f764710a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll @@ -0,0 +1,96 @@ +;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null + +; Validate that we do not crash while running this test. +;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 +;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 +;%27:intregs = A2_zxtb %3:intregs - SU2 +;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg +;If we have dependent phis, SU0 should be the successor of SU1 not +;the other way around. (it used to be SU1 is the successor of SU0). +;In some cases, SU0 is scheduled earlier than SU1 resulting in bad +;IR as we do not have a value that can be used by SU2. + +@global = common dso_local local_unnamed_addr global ptr null, align 4 +@global.1 = common dso_local local_unnamed_addr global i32 0, align 4 +@global.2 = common dso_local local_unnamed_addr global i16 0, align 2 +@global.3 = common dso_local local_unnamed_addr global i16 0, align 2 +@global.4 = common dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind +define dso_local i32 @wombat(i8 zeroext %arg) local_unnamed_addr #0 { +bb: + %load = load ptr, ptr @global, align 4 + %load1 = load i32, ptr @global.1, align 4 + %add2 = add nsw i32 %load1, -1 + store i32 %add2, ptr @global.1, align 4 + %icmp = icmp eq i32 %load1, 0 + br i1 %icmp, label %bb36, label %bb3 + +bb3: ; preds = %bb3, %bb + %phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ] + %phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ] + %phi5 = phi i16 [ %select23, %bb3 ], [ undef, %bb ] + %phi6 = phi i16 [ %select26, %bb3 ], [ undef, %bb ] + %phi7 = phi i16 [ %select, %bb3 ], [ undef, %bb ] + %phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ] + %zext = zext i8 %phi4 to i32 + %getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext + %getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2 + %ptrtoint = ptrtoint ptr %getelementptr9 to i32 + %trunc = trunc i32 %ptrtoint to i16 + %sext10 = sext i16 %phi7 to i32 + %shl11 = shl i32 %ptrtoint, 16 + %ashr = ashr exact i32 %shl11, 16 + %icmp12 = icmp slt i32 %ashr, %sext10 + %select = select i1 %icmp12, i16 %trunc, i16 %phi7 + %getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3 + %load14 = load i32, ptr %getelementptr13, align 4 + %shl = shl i32 %load14, 8 + %getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1 + %load16 = load i32, ptr %getelementptr15, align 4 + %shl17 = shl i32 %load16, 16 + %ashr18 = ashr exact i32 %shl17, 16 + %add = add nsw i32 %ashr18, %load14 + %lshr = lshr i32 %add, 8 + %or = or i32 %lshr, %shl + %sub = sub i32 %or, %load16 + %trunc19 = trunc i32 %sub to i16 + %sext = sext i16 %phi5 to i32 + %shl20 = shl i32 %sub, 16 + %ashr21 = ashr exact i32 %shl20, 16 + %icmp22 = icmp sgt i32 %ashr21, %sext + %select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5 + %sext24 = sext i16 %phi6 to i32 + %icmp25 = icmp slt i32 %ashr21, %sext24 + %select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6 + %icmp27 = icmp eq i8 %phi8, 0 + %add28 = add i8 %phi8, -1 + %select29 = select i1 %icmp27, i8 0, i8 %add28 + %add30 = add nsw i32 %phi, -1 + %icmp31 = icmp eq i32 %phi, 0 + br i1 %icmp31, label %bb32, label %bb3 + +bb32: ; preds = %bb3 + store i16 %trunc, ptr @global.2, align 2 + store i16 %trunc19, ptr @global.3, align 2 + store i32 -1, ptr @global.1, align 4 + %sext33 = sext i16 %select to i32 + %sext34 = sext i16 %select23 to i32 + %sext35 = sext i16 %select26 to i32 + br label %bb36 + +bb36: ; preds = %bb32, %bb + %phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ] + %phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ] + %phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ] + %sub40 = sub nsw i32 %phi39, %phi38 + %icmp41 = icmp slt i32 %sub40, %phi37 + br i1 %icmp41, label %bb43, label %bb42 + +bb42: ; preds = %bb36 + store i32 0, ptr @global.4, align 4 + br label %bb43 + +bb43: ; preds = %bb42, %bb36 + ret i32 undef +} diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index af1b848a8cf2d..07b129ffeb3e9 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -5,11 +5,13 @@ ; 2, so the computation for the number of Phis needs to be adjusted when ; the incoming prolog block is from prolog 0 or prolog 1. ; Note: the pipeliner no longer generates a 3 stage pipeline for this test. +; Note: the pipeliner has been generating a 4-stage pipelined loop. ; CHECK: loop0 ; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8) +; CHECK: r{{[0-9]+}} = [[REG0]] ; CHECK: endloop0 -; CHECK: [[REG0]] = add(r{{[0-9]+}},#8) +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8) ; Function Attrs: nounwind define void @f0(ptr nocapture readonly %a0, i32 %a1) #0 { @@ -50,6 +52,6 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" } +attributes #0 = { nounwind "target-cpu"="hexagonv68" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } From a1804327804add4ca8ccc4a3911e95dc16e4f578 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Wed, 16 Apr 2025 00:18:52 -0700 Subject: [PATCH 02/14] [HEXAGON] [MachinePipeliner] Fix the DAG in case of dependent phis. %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 %27:intregs = A2_zxtb %3:intregs - SU2 %13:intregs = C2_muxri %45:predregs, 0, %46:intreg If we have dependent phis, SU0 should be the successor of SU1 not the other way around. Co-authored-by: Sumanth Gundapaneni --- llvm/lib/CodeGen/MachinePipeliner.cpp | 20 +++- .../CodeGen/Hexagon/swp-dependent-phis.ll | 96 +++++++++++++++++++ llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 6 +- 3 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 6cb0299a30d7a..de10402fe7f48 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -962,8 +962,26 @@ void SwingSchedulerDAG::updatePhiDependences() { HasPhiDef = Reg; // Add a chain edge to a dependent Phi that isn't an existing // predecessor. + + // %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 + // %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 + // %27:intregs = A2_zxtb %3:intregs - SU2 + // %13:intregs = C2_muxri %45:predregs, 0, %46:intreg + // If we have dependent phis, SU0 should be the successor of SU1 + // not the other way around. (it used to be SU1 is the successor + // of SU0). In some cases, SU0 is scheduled earlier than SU1 + // resulting in bad IR as we do not have a value that can be used + // by SU2. + + // Reachability check is to ensure that we do not violate DAG. + // %1:intregs = PHI %10:intregs, %bb.0, %3:intregs, %bb.1 - SU0 + // %2:intregs = PHI %10:intregs, %bb.0, %1:intregs, %bb.1 - SU1 + // %3:intregs = PHI %11:intregs, %bb.0, %2:intregs, %bb.1 - SU2 + // S2_storerb_io %0:intregs, 0, %2:intregs + // Make sure we do not create an edge between SU2 and SU0. + if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) - I.addPred(SDep(SU, SDep::Barrier)); + SU->addPred(SDep(&I, SDep::Barrier)); } } } diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll new file mode 100644 index 0000000000000..bff89f764710a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll @@ -0,0 +1,96 @@ +;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null + +; Validate that we do not crash while running this test. +;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 +;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 +;%27:intregs = A2_zxtb %3:intregs - SU2 +;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg +;If we have dependent phis, SU0 should be the successor of SU1 not +;the other way around. (it used to be SU1 is the successor of SU0). +;In some cases, SU0 is scheduled earlier than SU1 resulting in bad +;IR as we do not have a value that can be used by SU2. + +@global = common dso_local local_unnamed_addr global ptr null, align 4 +@global.1 = common dso_local local_unnamed_addr global i32 0, align 4 +@global.2 = common dso_local local_unnamed_addr global i16 0, align 2 +@global.3 = common dso_local local_unnamed_addr global i16 0, align 2 +@global.4 = common dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind +define dso_local i32 @wombat(i8 zeroext %arg) local_unnamed_addr #0 { +bb: + %load = load ptr, ptr @global, align 4 + %load1 = load i32, ptr @global.1, align 4 + %add2 = add nsw i32 %load1, -1 + store i32 %add2, ptr @global.1, align 4 + %icmp = icmp eq i32 %load1, 0 + br i1 %icmp, label %bb36, label %bb3 + +bb3: ; preds = %bb3, %bb + %phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ] + %phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ] + %phi5 = phi i16 [ %select23, %bb3 ], [ undef, %bb ] + %phi6 = phi i16 [ %select26, %bb3 ], [ undef, %bb ] + %phi7 = phi i16 [ %select, %bb3 ], [ undef, %bb ] + %phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ] + %zext = zext i8 %phi4 to i32 + %getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext + %getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2 + %ptrtoint = ptrtoint ptr %getelementptr9 to i32 + %trunc = trunc i32 %ptrtoint to i16 + %sext10 = sext i16 %phi7 to i32 + %shl11 = shl i32 %ptrtoint, 16 + %ashr = ashr exact i32 %shl11, 16 + %icmp12 = icmp slt i32 %ashr, %sext10 + %select = select i1 %icmp12, i16 %trunc, i16 %phi7 + %getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3 + %load14 = load i32, ptr %getelementptr13, align 4 + %shl = shl i32 %load14, 8 + %getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1 + %load16 = load i32, ptr %getelementptr15, align 4 + %shl17 = shl i32 %load16, 16 + %ashr18 = ashr exact i32 %shl17, 16 + %add = add nsw i32 %ashr18, %load14 + %lshr = lshr i32 %add, 8 + %or = or i32 %lshr, %shl + %sub = sub i32 %or, %load16 + %trunc19 = trunc i32 %sub to i16 + %sext = sext i16 %phi5 to i32 + %shl20 = shl i32 %sub, 16 + %ashr21 = ashr exact i32 %shl20, 16 + %icmp22 = icmp sgt i32 %ashr21, %sext + %select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5 + %sext24 = sext i16 %phi6 to i32 + %icmp25 = icmp slt i32 %ashr21, %sext24 + %select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6 + %icmp27 = icmp eq i8 %phi8, 0 + %add28 = add i8 %phi8, -1 + %select29 = select i1 %icmp27, i8 0, i8 %add28 + %add30 = add nsw i32 %phi, -1 + %icmp31 = icmp eq i32 %phi, 0 + br i1 %icmp31, label %bb32, label %bb3 + +bb32: ; preds = %bb3 + store i16 %trunc, ptr @global.2, align 2 + store i16 %trunc19, ptr @global.3, align 2 + store i32 -1, ptr @global.1, align 4 + %sext33 = sext i16 %select to i32 + %sext34 = sext i16 %select23 to i32 + %sext35 = sext i16 %select26 to i32 + br label %bb36 + +bb36: ; preds = %bb32, %bb + %phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ] + %phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ] + %phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ] + %sub40 = sub nsw i32 %phi39, %phi38 + %icmp41 = icmp slt i32 %sub40, %phi37 + br i1 %icmp41, label %bb43, label %bb42 + +bb42: ; preds = %bb36 + store i32 0, ptr @global.4, align 4 + br label %bb43 + +bb43: ; preds = %bb42, %bb36 + ret i32 undef +} diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index af1b848a8cf2d..07b129ffeb3e9 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -5,11 +5,13 @@ ; 2, so the computation for the number of Phis needs to be adjusted when ; the incoming prolog block is from prolog 0 or prolog 1. ; Note: the pipeliner no longer generates a 3 stage pipeline for this test. +; Note: the pipeliner has been generating a 4-stage pipelined loop. ; CHECK: loop0 ; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8) +; CHECK: r{{[0-9]+}} = [[REG0]] ; CHECK: endloop0 -; CHECK: [[REG0]] = add(r{{[0-9]+}},#8) +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8) ; Function Attrs: nounwind define void @f0(ptr nocapture readonly %a0, i32 %a1) #0 { @@ -50,6 +52,6 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" } +attributes #0 = { nounwind "target-cpu"="hexagonv68" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } From 50ad52cd557bb602c7511c134f528d66db3d24a3 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Wed, 16 Apr 2025 19:42:53 -0700 Subject: [PATCH 03/14] Fixing the undef deprecator issues --- llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll index bff89f764710a..542df508a2f71 100644 --- a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll +++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll @@ -17,7 +17,7 @@ @global.4 = common dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: nofree norecurse nosync nounwind -define dso_local i32 @wombat(i8 zeroext %arg) local_unnamed_addr #0 { +define dso_local i32 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 { bb: %load = load ptr, ptr @global, align 4 %load1 = load i32, ptr @global.1, align 4 @@ -29,9 +29,9 @@ bb: bb3: ; preds = %bb3, %bb %phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ] %phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ] - %phi5 = phi i16 [ %select23, %bb3 ], [ undef, %bb ] - %phi6 = phi i16 [ %select26, %bb3 ], [ undef, %bb ] - %phi7 = phi i16 [ %select, %bb3 ], [ undef, %bb ] + %phi5 = phi i16 [ %select23, %bb3 ], [ %dummy, %bb ] + %phi6 = phi i16 [ %select26, %bb3 ], [ %dummy, %bb ] + %phi7 = phi i16 [ %select, %bb3 ], [ %dummy, %bb ] %phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ] %zext = zext i8 %phi4 to i32 %getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext @@ -92,5 +92,5 @@ bb42: ; preds = %bb36 br label %bb43 bb43: ; preds = %bb42, %bb36 - ret i32 undef + ret i32 %dummy } From a0abb4370cbe09b5a9e978950afc66c8f73c7bd4 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Fri, 18 Apr 2025 21:08:20 -0700 Subject: [PATCH 04/14] Fixing the type of the dummy variable --- llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll index 542df508a2f71..7d4e626fcf80b 100644 --- a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll +++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll @@ -17,7 +17,7 @@ @global.4 = common dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: nofree norecurse nosync nounwind -define dso_local i32 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 { +define dso_local i32 @wombat(i8 zeroext %arg, i32 %dummy) local_unnamed_addr #0 { bb: %load = load ptr, ptr @global, align 4 %load1 = load i32, ptr @global.1, align 4 From 6f0fe175aa6552dcad0dc7b8075a7bca5f84f43c Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Tue, 22 Apr 2025 02:39:10 -0700 Subject: [PATCH 05/14] Fixing swp-dependent-phis failure --- llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll index 7d4e626fcf80b..6d324029966d7 100644 --- a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll +++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll @@ -17,7 +17,7 @@ @global.4 = common dso_local local_unnamed_addr global i32 0, align 4 ; Function Attrs: nofree norecurse nosync nounwind -define dso_local i32 @wombat(i8 zeroext %arg, i32 %dummy) local_unnamed_addr #0 { +define dso_local i16 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 { bb: %load = load ptr, ptr @global, align 4 %load1 = load i32, ptr @global.1, align 4 @@ -92,5 +92,5 @@ bb42: ; preds = %bb36 br label %bb43 bb43: ; preds = %bb42, %bb36 - ret i32 %dummy + ret i16 %dummy } From db4ba4984d14f8e07548f2ca4177a5a03b6c2109 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Mon, 26 May 2025 09:32:43 -0700 Subject: [PATCH 06/14] Adding the reachability check --- llvm/lib/CodeGen/MachinePipeliner.cpp | 3 +- llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll | 32 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index de10402fe7f48..33dc0ece78a26 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -980,7 +980,8 @@ void SwingSchedulerDAG::updatePhiDependences() { // S2_storerb_io %0:intregs, 0, %2:intregs // Make sure we do not create an edge between SU2 and SU0. - if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) + if (SU->NodeNum < I.NodeNum && !I.isPred(SU) && + !IsReachable(&I, SU)) SU->addPred(SDep(&I, SDep::Barrier)); } } diff --git a/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll b/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll new file mode 100644 index 0000000000000..aa3956508e3bd --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon -mv71 -O2 < %s -o - 2>&1 > /dev/null +; Ensure we do not invalidate a DAG by forming a circuit. +; If we form a circuit, this test crashes while creating the DAG +; with topological sorting. + +%struct.quux = type { i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8], %struct.ham, %struct.bar, i8 } +%struct.ham = type { i8, [2 x i8], [2 x i8], [2 x i8] } +%struct.bar = type { [2 x i8], [2 x i8], [2 x i8] } + +define dso_local void @blam(i32 %arg) local_unnamed_addr #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %phi = phi i8 [ %phi6, %bb1 ], [ undef, %bb ] + %phi2 = phi i8 [ %phi, %bb1 ], [ undef, %bb ] + %phi3 = phi i8 [ %phi2, %bb1 ], [ 0, %bb ] + %phi4 = phi i8 [ %phi3, %bb1 ], [ undef, %bb ] + %phi5 = phi i8 [ %phi4, %bb1 ], [ undef, %bb ] + %phi6 = phi i8 [ %phi5, %bb1 ], [ undef, %bb ] + %phi7 = phi i32 [ %add, %bb1 ], [ undef, %bb ] + %getelementptr = getelementptr inbounds %struct.quux, ptr null, i32 %arg, i32 12, i32 1, i32 undef + store i8 %phi4, ptr %getelementptr, align 1 + %add = add i32 %phi7, -1 + %icmp = icmp eq i32 %add, 0 + br i1 %icmp, label %bb8, label %bb1 + +bb8: ; preds = %bb1 + ret void +} + +attributes #0 = { "target-features"="+v71,-long-calls,-small-data" } From bfe355aaad29e93e7b3444920296bec1e0ccf739 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Mon, 26 May 2025 09:44:49 -0700 Subject: [PATCH 07/14] Removing irrelevant comment and attributes --- llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index 1bad7c8b09d52..7b3d0812845ee 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -4,7 +4,6 @@ ; blocks, when there are 3 epilog blocks. The Phi was scheduled in stage ; 2, so the computation for the number of Phis needs to be adjusted when ; the incoming prolog block is from prolog 0 or prolog 1. -; Note: the pipeliner no longer generates a 3 stage pipeline for this test. ; Note: the pipeliner has been generating a 4-stage pipelined loop. ; CHECK: loop0 @@ -52,7 +51,3 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1 - -attributes #0 = { nounwind "target-cpu"="hexagonv68" } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind } From 22d31a8f25beb770f3e8c7e6ddb97e466022a0b0 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Mon, 26 May 2025 10:06:44 -0700 Subject: [PATCH 08/14] Fixing the undef deprecator issues --- llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll b/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll index aa3956508e3bd..c66c6f2f605d1 100644 --- a/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll +++ b/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll @@ -7,19 +7,19 @@ %struct.ham = type { i8, [2 x i8], [2 x i8], [2 x i8] } %struct.bar = type { [2 x i8], [2 x i8], [2 x i8] } -define dso_local void @blam(i32 %arg) local_unnamed_addr #0 { +define dso_local void @blam(i32 %arg, i8 %dummy, i32 %tmp) local_unnamed_addr #0 { bb: br label %bb1 bb1: ; preds = %bb1, %bb - %phi = phi i8 [ %phi6, %bb1 ], [ undef, %bb ] - %phi2 = phi i8 [ %phi, %bb1 ], [ undef, %bb ] + %phi = phi i8 [ %phi6, %bb1 ], [ %dummy, %bb ] + %phi2 = phi i8 [ %phi, %bb1 ], [ %dummy, %bb ] %phi3 = phi i8 [ %phi2, %bb1 ], [ 0, %bb ] - %phi4 = phi i8 [ %phi3, %bb1 ], [ undef, %bb ] - %phi5 = phi i8 [ %phi4, %bb1 ], [ undef, %bb ] - %phi6 = phi i8 [ %phi5, %bb1 ], [ undef, %bb ] - %phi7 = phi i32 [ %add, %bb1 ], [ undef, %bb ] - %getelementptr = getelementptr inbounds %struct.quux, ptr null, i32 %arg, i32 12, i32 1, i32 undef + %phi4 = phi i8 [ %phi3, %bb1 ], [ %dummy, %bb ] + %phi5 = phi i8 [ %phi4, %bb1 ], [ %dummy, %bb ] + %phi6 = phi i8 [ %phi5, %bb1 ], [ %dummy, %bb ] + %phi7 = phi i32 [ %add, %bb1 ], [ %tmp, %bb ] + %getelementptr = getelementptr inbounds %struct.quux, ptr null, i32 %arg, i32 12, i32 1, i8 %dummy store i8 %phi4, ptr %getelementptr, align 1 %add = add i32 %phi7, -1 %icmp = icmp eq i32 %add, 0 From a292569d3aaa93df2b2c9b25cd6f5e3659a96572 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Tue, 21 Oct 2025 08:57:13 -0700 Subject: [PATCH 09/14] XFAIL the irrelevant test --- llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index 7b3d0812845ee..2c9e941bdabe4 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -1,3 +1,4 @@ +; XFAIL: hexagon ; RUN: llc -mtriple=hexagon -hexagon-initial-cfg-cleanup=0 -disable-cgp-delete-phis < %s | FileCheck %s ; Test that we generate the correct Phi name in the last couple of epilog From 49243dcb011888d4ec6abfdcf054560beba56cab Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Mon, 17 Nov 2025 06:15:59 -0800 Subject: [PATCH 10/14] Removing reachability check as PHI-PHI dependencies cycles are handled early on - https://github.com/llvm/llvm-project/pull/167095 --- llvm/lib/CodeGen/MachinePipeliner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 5e63308a67006..680e36750d61c 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1232,8 +1232,7 @@ void SwingSchedulerDAG::updatePhiDependences() { // S2_storerb_io %0:intregs, 0, %2:intregs // Make sure we do not create an edge between SU2 and SU0. - if (SU->NodeNum < I.NodeNum && !I.isPred(SU) && - !IsReachable(&I, SU)) + if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) SU->addPred(SDep(&I, SDep::Barrier)); } } From 93f8351972f5f30193d96f720842113b9937f894 Mon Sep 17 00:00:00 2001 From: Abinaya Saravanan Date: Wed, 19 Nov 2025 21:35:12 +0530 Subject: [PATCH 11/14] Update llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll Co-authored-by: Ryotaro Kasuga --- llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index 2c9e941bdabe4..5f3f3dcd2b171 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -1,4 +1,4 @@ -; XFAIL: hexagon +; XFAIL: * ; RUN: llc -mtriple=hexagon -hexagon-initial-cfg-cleanup=0 -disable-cgp-delete-phis < %s | FileCheck %s ; Test that we generate the correct Phi name in the last couple of epilog From 8a925e2540a355ba7d9a1e9740d6a3a227544e52 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Wed, 19 Nov 2025 08:26:42 -0800 Subject: [PATCH 12/14] Removing the irrelevant test and comments --- llvm/lib/CodeGen/MachinePipeliner.cpp | 7 ----- llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll | 32 -------------------- 2 files changed, 39 deletions(-) delete mode 100644 llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 680e36750d61c..19a968d04766a 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1225,13 +1225,6 @@ void SwingSchedulerDAG::updatePhiDependences() { // resulting in bad IR as we do not have a value that can be used // by SU2. - // Reachability check is to ensure that we do not violate DAG. - // %1:intregs = PHI %10:intregs, %bb.0, %3:intregs, %bb.1 - SU0 - // %2:intregs = PHI %10:intregs, %bb.0, %1:intregs, %bb.1 - SU1 - // %3:intregs = PHI %11:intregs, %bb.0, %2:intregs, %bb.1 - SU2 - // S2_storerb_io %0:intregs, 0, %2:intregs - // Make sure we do not create an edge between SU2 and SU0. - if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) SU->addPred(SDep(&I, SDep::Barrier)); } diff --git a/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll b/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll deleted file mode 100644 index c66c6f2f605d1..0000000000000 --- a/llvm/test/CodeGen/Hexagon/swp-invalid-dag.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc -march=hexagon -mv71 -O2 < %s -o - 2>&1 > /dev/null -; Ensure we do not invalidate a DAG by forming a circuit. -; If we form a circuit, this test crashes while creating the DAG -; with topological sorting. - -%struct.quux = type { i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8], %struct.ham, %struct.bar, i8 } -%struct.ham = type { i8, [2 x i8], [2 x i8], [2 x i8] } -%struct.bar = type { [2 x i8], [2 x i8], [2 x i8] } - -define dso_local void @blam(i32 %arg, i8 %dummy, i32 %tmp) local_unnamed_addr #0 { -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %phi = phi i8 [ %phi6, %bb1 ], [ %dummy, %bb ] - %phi2 = phi i8 [ %phi, %bb1 ], [ %dummy, %bb ] - %phi3 = phi i8 [ %phi2, %bb1 ], [ 0, %bb ] - %phi4 = phi i8 [ %phi3, %bb1 ], [ %dummy, %bb ] - %phi5 = phi i8 [ %phi4, %bb1 ], [ %dummy, %bb ] - %phi6 = phi i8 [ %phi5, %bb1 ], [ %dummy, %bb ] - %phi7 = phi i32 [ %add, %bb1 ], [ %tmp, %bb ] - %getelementptr = getelementptr inbounds %struct.quux, ptr null, i32 %arg, i32 12, i32 1, i8 %dummy - store i8 %phi4, ptr %getelementptr, align 1 - %add = add i32 %phi7, -1 - %icmp = icmp eq i32 %add, 0 - br i1 %icmp, label %bb8, label %bb1 - -bb8: ; preds = %bb1 - ret void -} - -attributes #0 = { "target-features"="+v71,-long-calls,-small-data" } From 373972e8eeadc5f4e4a46aec1a98c466e0ced578 Mon Sep 17 00:00:00 2001 From: Abinaya Saravanan Date: Mon, 24 Nov 2025 15:28:57 +0530 Subject: [PATCH 13/14] Update llvm/lib/CodeGen/MachinePipeliner.cpp Co-authored-by: Ryotaro Kasuga --- llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 3e523af525179..59ead74da6fb6 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1285,7 +1285,7 @@ void SwingSchedulerDAG::updatePhiDependences() { // resulting in bad IR as we do not have a value that can be used // by SU2. - if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) + if (SU->NodeNum < I.NodeNum && !SU->isPred(&I)) SU->addPred(SDep(&I, SDep::Barrier)); } } From cddbcd433327c32909c30589bb4a5c40cd6bc830 Mon Sep 17 00:00:00 2001 From: quic-asaravan Date: Mon, 1 Dec 2025 23:32:51 -0800 Subject: [PATCH 14/14] XFAIL swp-epilog-phi9.ll only for Hexagon target --- llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll index 5f3f3dcd2b171..549428643ccaa 100644 --- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll +++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll @@ -1,4 +1,4 @@ -; XFAIL: * +; XFAIL: Hexagon ; RUN: llc -mtriple=hexagon -hexagon-initial-cfg-cleanup=0 -disable-cgp-delete-phis < %s | FileCheck %s ; Test that we generate the correct Phi name in the last couple of epilog