Skip to content

Commit 74c3bcc

Browse files
jaladreipsigcbot
authored andcommitted
Implement cross block load vectorization for inline raytracing
We are not having performance parity with the old implementation. One of the reasons is suboptimal loading from rtstack. This change should coalesce loads for trivial rayquery usages
1 parent 3275c8a commit 74c3bcc

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

IGC/AdaptorCommon/RayTracing/NewTraceRayInlineLoweringPass.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
522522
RQI->replaceAllUsesWith(getPackedData(IRB, rqObject).CandidateType);
523523
break;
524524
case GenISAIntrinsic::GenISA_TraceRayInlineRayInfo: {
525+
525526
auto *I = cast<RayQueryInfoIntrinsic>(RQI);
526527
auto data = getPackedData(IRB, rqObject);
527528
auto *loadCommittedFromPotential = IRB.CreateICmpEQ(data.CommittedDataLocation, IRB.getInt32(PotentialHit),
@@ -532,7 +533,7 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
532533

533534
switch (I->getInfoKind()) {
534535
default:
535-
I->replaceAllUsesWith(IRB.lowerRayInfo(getStackPtr(IRB, rqObject), I, shaderTy, std::nullopt));
536+
I->replaceAllUsesWith(IRB.lowerRayInfo(getStackPtr(IRB, rqObject, true), I, shaderTy, std::nullopt));
536537
break;
537538
// leave this in for now, until we prove we don't need the hack anymore
538539
case GEOMETRY_INDEX: {
@@ -541,9 +542,9 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
541542
specialPattern = forceShortCurcuitingOR_CommittedGeomIdx(IRB, I);
542543
}
543544

544-
Value *leafType = IRB.getLeafType(getStackPtr(IRB, rqObject), IRB.getInt1(I->isCommitted()));
545+
Value *leafType = IRB.getLeafType(getStackPtr(IRB, rqObject, true), IRB.getInt1(I->isCommitted()));
545546
Value *geoIndex = IRB.getGeometryIndex(
546-
getStackPtr(IRB, rqObject), I, leafType,
547+
getStackPtr(IRB, rqObject, true), I, leafType,
547548
IRB.getInt32(I->isCommitted() ? CallableShaderTypeMD::ClosestHit : CallableShaderTypeMD::AnyHit),
548549
!specialPattern);
549550
IGC_ASSERT_MESSAGE(I->getType()->isIntegerTy(), "Invalid geometryIndex type!");

IGC/AdaptorCommon/RayTracing/NewTraceRayInlineLoweringPass.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class InlineRaytracing : public AllocationLivenessAnalyzer {
3838
CodeGenContext *m_pCGCtx = nullptr;
3939
llvm::StructType *m_RQObjectType = nullptr;
4040
uint32_t m_numSlotsUsed = 0;
41+
llvm::DenseMap<std::pair<llvm::BasicBlock *, llvm::Value *>, llvm::AllocaInst *> m_CrossBlockVectorizationStacks;
4142

4243
void LowerIntrinsics(llvm::Function &F);
4344
bool LowerAllocations(llvm::Function &F);
@@ -142,7 +143,37 @@ class InlineRaytracing : public AllocationLivenessAnalyzer {
142143
IRB.CreateStore(packedData, getAtIndexFromRayQueryObject(IRB, rqObject, 1));
143144
}
144145

145-
llvm::RTBuilder::SyncStackPointerVal *getStackPtr(llvm::RTBuilder &IRB, llvm::Value *rqObject) {
146+
llvm::RTBuilder::SyncStackPointerVal *getStackPtr(llvm::RTBuilder &IRB, llvm::Value *rqObject,
147+
bool allowXBlockVectorize = false) {
148+
149+
bool doXBlockVectorize =
150+
allowXBlockVectorize && IGC_IS_FLAG_ENABLED(UseCrossBlockLoadVectorizationForInlineRaytracing);
151+
152+
// scan the basic block for continuation intrinsics. we don't want to contribute to raytracing swstack
153+
if (doXBlockVectorize) {
154+
for (auto &I : *IRB.GetInsertBlock())
155+
if (llvm::isa<llvm::ContinuationHLIntrinsic>(&I))
156+
doXBlockVectorize = false;
157+
}
158+
159+
if (doXBlockVectorize) {
160+
auto key = std::make_pair(IRB.GetInsertBlock(), rqObject);
161+
if (m_CrossBlockVectorizationStacks.find(key) == m_CrossBlockVectorizationStacks.end()) {
162+
163+
llvm::RTBuilder::InsertPointGuard g(IRB);
164+
IRB.SetInsertPoint(key.first->getParent()->getEntryBlock().getFirstNonPHI());
165+
auto *SMStack =
166+
IRB.CreateAlloca(IRB.getRTStack2Ty(), nullptr,
167+
VALUE_NAME("CrossBlockLoadSMStackForBlock"));
168+
IRB.SetInsertPoint(key.first->getFirstNonPHI());
169+
IRB.CreateMemCpy(SMStack, getStackPtr(IRB, rqObject), IRB.getSyncRTStackSize(),
170+
RayDispatchGlobalData::StackChunkSize);
171+
m_CrossBlockVectorizationStacks[key] = SMStack;
172+
}
173+
174+
return static_cast<llvm::RTBuilder::SyncStackPointerVal *>(llvm::cast<llvm::Value>(m_CrossBlockVectorizationStacks[key]));
175+
}
176+
146177
return static_cast<llvm::RTBuilder::SyncStackPointerVal *>(
147178
llvm::cast<llvm::Value>(IRB.CreateCall(m_Functions[GET_STACK_POINTER_FROM_RQ_OBJECT], rqObject)));
148179
}

IGC/common/igc_flags.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1837,6 +1837,8 @@ DECLARE_IGC_REGKEY_BITMASK(UseNewInlineRaytracing, 4, "Use the new rayquery impl
18371837
NEW_INLINE_RAYTRACING_MASK, true)
18381838
DECLARE_IGC_REGKEY(DWORD, AddDummySlotsForNewInlineRaytracing, 0,
18391839
"Add dummy rayquery slots when doing new inline raytracing", true)
1840+
DECLARE_IGC_REGKEY(bool, UseCrossBlockLoadVectorizationForInlineRaytracing, false,
1841+
"If enabled, will try to vectorize loads that are not adjacent to each other. May increase GRF pressure", true)
18401842
DECLARE_IGC_REGKEY(bool, OverrideRayQueryThrottling, false,
18411843
"Force rayquery throttling (dynamic ray management) to be enabled or disabled. Default value of "
18421844
"this key is ignored",

0 commit comments

Comments
 (0)