From 6d35135d82710d8388bd0de38b916a9d38a24d74 Mon Sep 17 00:00:00 2001 From: Carlo Bertolli Date: Tue, 28 Oct 2025 15:46:37 -0500 Subject: [PATCH] [AMDGPU] Enable expensive unroll trip count. This patch enables unrolling innermost loop of Pytorch reduce function in Normalization.cuh. --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 03d16fdd54c42..100c66f77c557 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -271,6 +271,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences( if (L->isInnermost() && BB->size() < UnrollMaxBlockToAnalyze) UP.MaxIterationsCountToAnalyze = 32; } + + UP.AllowExpensiveTripCount = true; } void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,