@@ -38,6 +38,7 @@ class InlineRaytracing : public AllocationLivenessAnalyzer {
38
38
CodeGenContext *m_pCGCtx = nullptr ;
39
39
llvm::StructType *m_RQObjectType = nullptr ;
40
40
uint32_t m_numSlotsUsed = 0 ;
41
+ llvm::DenseMap<std::pair<llvm::BasicBlock *, llvm::Value *>, llvm::AllocaInst *> m_CrossBlockVectorizationStacks;
41
42
42
43
void LowerIntrinsics (llvm::Function &F);
43
44
bool LowerAllocations (llvm::Function &F);
@@ -142,7 +143,37 @@ class InlineRaytracing : public AllocationLivenessAnalyzer {
142
143
IRB.CreateStore (packedData, getAtIndexFromRayQueryObject (IRB, rqObject, 1 ));
143
144
}
144
145
145
- llvm::RTBuilder::SyncStackPointerVal *getStackPtr (llvm::RTBuilder &IRB, llvm::Value *rqObject) {
146
+ llvm::RTBuilder::SyncStackPointerVal *getStackPtr (llvm::RTBuilder &IRB, llvm::Value *rqObject,
147
+ bool allowXBlockVectorize = false ) {
148
+
149
+ bool doXBlockVectorize =
150
+ allowXBlockVectorize && IGC_IS_FLAG_ENABLED (UseCrossBlockLoadVectorizationForInlineRaytracing);
151
+
152
+ // scan the basic block for continuation intrinsics. we don't want to contribute to raytracing swstack
153
+ if (doXBlockVectorize) {
154
+ for (auto &I : *IRB.GetInsertBlock ())
155
+ if (llvm::isa<llvm::ContinuationHLIntrinsic>(&I))
156
+ doXBlockVectorize = false ;
157
+ }
158
+
159
+ if (doXBlockVectorize) {
160
+ auto key = std::make_pair (IRB.GetInsertBlock (), rqObject);
161
+ if (m_CrossBlockVectorizationStacks.find (key) == m_CrossBlockVectorizationStacks.end ()) {
162
+
163
+ llvm::RTBuilder::InsertPointGuard g (IRB);
164
+ IRB.SetInsertPoint (key.first ->getParent ()->getEntryBlock ().getFirstNonPHI ());
165
+ auto *SMStack =
166
+ IRB.CreateAlloca (IRB.getRTStack2Ty (), nullptr ,
167
+ VALUE_NAME (" CrossBlockLoadSMStackForBlock" ));
168
+ IRB.SetInsertPoint (key.first ->getFirstNonPHI ());
169
+ IRB.CreateMemCpy (SMStack, getStackPtr (IRB, rqObject), IRB.getSyncRTStackSize (),
170
+ RayDispatchGlobalData::StackChunkSize);
171
+ m_CrossBlockVectorizationStacks[key] = SMStack;
172
+ }
173
+
174
+ return static_cast <llvm::RTBuilder::SyncStackPointerVal *>(llvm::cast<llvm::Value>(m_CrossBlockVectorizationStacks[key]));
175
+ }
176
+
146
177
return static_cast <llvm::RTBuilder::SyncStackPointerVal *>(
147
178
llvm::cast<llvm::Value>(IRB.CreateCall (m_Functions[GET_STACK_POINTER_FROM_RQ_OBJECT], rqObject)));
148
179
}
0 commit comments