@@ -103,6 +103,7 @@ void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
103103 builder.CreateMemSet (gcframe, Constant::getNullValue (Type::getInt8Ty (F.getContext ())), ptrsize * (nRoots + 2 ), Align (16 ), tbaa_gcframe);
104104
105105 target->replaceAllUsesWith (gcframe);
106+ target->eraseFromParent ();
106107}
107108
108109void FinalLowerGC::lowerPushGCFrame (CallInst *target, Function &F)
@@ -130,6 +131,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
130131 gcframe,
131132 pgcstack,
132133 Align (sizeof (void *)));
134+ target->eraseFromParent ();
133135}
134136
135137void FinalLowerGC::lowerPopGCFrame (CallInst *target, Function &F)
@@ -148,6 +150,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
148150 pgcstack,
149151 Align (sizeof (void *)));
150152 inst->setMetadata (LLVMContext::MD_tbaa, tbaa_gcframe);
153+ target->eraseFromParent ();
151154}
152155
153156void FinalLowerGC::lowerGetGCFrameSlot (CallInst *target, Function &F)
@@ -167,6 +170,7 @@ void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
167170 auto gep = builder.CreateInBoundsGEP (T_prjlvalue, gcframe, index);
168171 gep->takeName (target);
169172 target->replaceAllUsesWith (gep);
173+ target->eraseFromParent ();
170174}
171175
172176void FinalLowerGC::lowerQueueGCRoot (CallInst *target, Function &F)
@@ -183,6 +187,7 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
183187 IRBuilder<> builder (target);
184188 Value* signal_page = target->getOperand (0 );
185189 builder.CreateLoad (T_size, signal_page, true );
190+ target->eraseFromParent ();
186191}
187192
188193#ifdef MMTK_GC
@@ -209,7 +214,6 @@ void FinalLowerGC::lowerWriteBarrier2Slow(CallInst *target, Function &F)
209214 assert (target->arg_size () == 2 );
210215 target->setCalledFunction (writeBarrier2SlowFunc);
211216}
212-
213217#endif
214218
215219void FinalLowerGC::lowerGCAllocBytes (CallInst *target, Function &F)
@@ -235,112 +239,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
235239 derefBytes = sz;
236240 }
237241 else {
238- #ifndef MMTK_GC
239242 auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), offset);
240243 auto pool_osize = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
241244 newI = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize, type });
242245 if (sz > 0 )
243246 derefBytes = sz;
244- #else // MMTK_GC
245- auto pool_osize_i32 = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
246- auto pool_osize = ConstantInt::get (Type::getInt64Ty (F.getContext ()), osize);
247-
248- // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
249- // Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
250- const bool INLINE_FASTPATH_ALLOCATION = true ;
251-
252- if (INLINE_FASTPATH_ALLOCATION) {
253- // Assuming we use the first immix allocator.
254- // FIXME: We should get the allocator index and type from MMTk.
255- auto allocator_offset = offsetof (jl_tls_states_t , mmtk_mutator) + offsetof (MMTkMutatorContext, allocators) + offsetof (Allocators, immix);
256-
257- auto cursor_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, cursor));
258- auto limit_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, limit));
259-
260- auto cursor_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, cursor_pos);
261- auto cursor_ptr = builder.CreateBitCast (cursor_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " cursor_ptr" );
262- auto cursor = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), cursor_ptr, " cursor" );
263-
264- // offset = 8
265- auto delta_offset = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), ConstantInt::get (Type::getInt64Ty (target->getContext ()), 8 ));
266- auto delta_cursor = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), cursor);
267- auto delta_op = builder.CreateNSWAdd (delta_offset, delta_cursor);
268- // alignment 16 (15 = 16 - 1)
269- auto delta = builder.CreateAnd (delta_op, ConstantInt::get (Type::getInt64Ty (target->getContext ()), 15 ), " delta" );
270- auto result = builder.CreateNSWAdd (cursor, delta, " result" );
271-
272- auto new_cursor = builder.CreateNSWAdd (result, pool_osize);
273-
274- auto limit_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, limit_pos);
275- auto limit_ptr = builder.CreateBitCast (limit_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " limit_ptr" );
276- auto limit = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), limit_ptr, " limit" );
277-
278- auto gt_limit = builder.CreateICmpSGT (new_cursor, limit);
279-
280- auto current_block = target->getParent ();
281- builder.SetInsertPoint (target->getNextNode ());
282- auto phiNode = builder.CreatePHI (poolAllocFunc->getReturnType (), 2 , " phi_fast_slow" );
283- auto top_cont = current_block->splitBasicBlock (target->getNextNode (), " top_cont" );
284-
285- auto slowpath = BasicBlock::Create (target->getContext (), " slowpath" , target->getFunction ());
286- auto fastpath = BasicBlock::Create (target->getContext (), " fastpath" , target->getFunction (), top_cont);
287-
288- auto next_br = current_block->getTerminator ();
289- next_br->eraseFromParent ();
290- builder.SetInsertPoint (current_block);
291- builder.CreateCondBr (gt_limit, slowpath, fastpath);
292-
293- // slowpath
294- builder.SetInsertPoint (slowpath);
295- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
296- auto new_call = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
297- new_call->setAttributes (new_call->getCalledFunction ()->getAttributes ());
298- builder.CreateBr (top_cont);
299-
300- // // fastpath
301- builder.SetInsertPoint (fastpath);
302- builder.CreateStore (new_cursor, cursor_ptr);
303-
304- // ptls->gc_num.allocd += osize;
305- auto pool_alloc_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), offsetof (jl_tls_states_t , gc_tls) + offsetof (jl_gc_tls_states_t , gc_num));
306- auto pool_alloc_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, pool_alloc_pos);
307- auto pool_alloc_tls = builder.CreateBitCast (pool_alloc_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " pool_alloc" );
308- auto pool_allocd = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), pool_alloc_tls);
309- auto pool_allocd_total = builder.CreateAdd (pool_allocd, pool_osize);
310- builder.CreateStore (pool_allocd_total, pool_alloc_tls);
311-
312- auto v_raw = builder.CreateNSWAdd (result, ConstantInt::get (Type::getInt64Ty (target->getContext ()), sizeof (jl_taggedvalue_t )));
313- auto v_as_ptr = builder.CreateIntToPtr (v_raw, poolAllocFunc->getReturnType ());
314- builder.CreateBr (top_cont);
315-
316- phiNode->addIncoming (new_call, slowpath);
317- phiNode->addIncoming (v_as_ptr, fastpath);
318- phiNode->takeName (target);
319-
320- target->replaceAllUsesWith (phiNode);
321- return ;
322- } else {
323- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
324- newI = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
325- if (sz > 0 )
326- derefBytes = sz;
327- }
328- #endif // MMTK_GC
329247 }
330248 } else {
331249 auto size = builder.CreateZExtOrTrunc (target->getArgOperand (1 ), T_size);
332250 // allocTypedFunc does not include the type tag in the allocation size!
333251 newI = builder.CreateCall (allocTypedFunc, { ptls, size, type });
334252 derefBytes = sizeof (void *);
335253 }
336-
337254 newI->setAttributes (newI->getCalledFunction ()->getAttributes ());
338255 unsigned align = std::max ((unsigned )target->getRetAlign ().valueOrOne ().value (), (unsigned )sizeof (void *));
339256 newI->addRetAttr (Attribute::getWithAlignment (F.getContext (), Align (align)));
340257 if (derefBytes > 0 )
341258 newI->addDereferenceableRetAttr (derefBytes);
342259 newI->takeName (target);
343260 target->replaceAllUsesWith (newI);
261+ target->eraseFromParent ();
344262}
345263
346264bool FinalLowerGC::runOnFunction (Function &F)
@@ -362,63 +280,48 @@ bool FinalLowerGC::runOnFunction(Function &F)
362280 poolAllocFunc = getOrDeclare (jl_well_known::GCPoolAlloc);
363281 bigAllocFunc = getOrDeclare (jl_well_known::GCBigAlloc);
364282 allocTypedFunc = getOrDeclare (jl_well_known::GCAllocTyped);
365- T_size = F.getParent ()->getDataLayout ().getIntPtrType (F.getContext ());
366-
367283#ifdef MMTK_GC
368284 writeBarrier1Func = getOrDeclare (jl_well_known::GCWriteBarrier1);
369285 writeBarrier2Func = getOrDeclare (jl_well_known::GCWriteBarrier2);
370286 writeBarrier1SlowFunc = getOrDeclare (jl_well_known::GCWriteBarrier1Slow);
371287 writeBarrier2SlowFunc = getOrDeclare (jl_well_known::GCWriteBarrier2Slow);
372288#endif
289+ T_size = F.getParent ()->getDataLayout ().getIntPtrType (F.getContext ());
373290
374291 // Lower all calls to supported intrinsics.
375292 for (auto &BB : F) {
376- for (auto it = BB.begin (); it != BB.end ();) {
377- auto *CI = dyn_cast<CallInst>(&*it);
378- if (!CI) {
379- ++it;
293+ for (auto &I : make_early_inc_range (BB)) {
294+ auto *CI = dyn_cast<CallInst>(&I);
295+ if (!CI)
380296 continue ;
381- }
382297
383298 Value *callee = CI->getCalledOperand ();
384299 assert (callee);
385300
386301#define LOWER_INTRINSIC (INTRINSIC, LOWER_INTRINSIC_FUNC ) \
387- auto INTRINSIC = getOrNull (jl_intrinsics::INTRINSIC); \
388- if (INTRINSIC == callee) { \
389- LOWER_INTRINSIC_FUNC (CI, F); \
390- it = CI-> eraseFromParent ( ); \
391- continue ; \
392- } \
302+ do { \
303+ auto intrinsic = getOrNull (jl_intrinsics::INTRINSIC); \
304+ if (intrinsic == callee) { \
305+ LOWER_INTRINSIC_FUNC (CI, F ); \
306+ } \
307+ } while ( 0 )
393308
394309 LOWER_INTRINSIC (newGCFrame, lowerNewGCFrame);
395310 LOWER_INTRINSIC (pushGCFrame, lowerPushGCFrame);
396311 LOWER_INTRINSIC (popGCFrame, lowerPopGCFrame);
397312 LOWER_INTRINSIC (getGCFrameSlot, lowerGetGCFrameSlot);
398313 LOWER_INTRINSIC (GCAllocBytes, lowerGCAllocBytes);
314+ LOWER_INTRINSIC (queueGCRoot, lowerQueueGCRoot);
399315 LOWER_INTRINSIC (safepoint, lowerSafepoint);
400316
401- // These lowerings preserve the CI and do not erase them from the parent
402- #define LOWER_WB_INTRINSIC (INTRINSIC, LOWER_INTRINSIC_FUNC ) \
403- auto INTRINSIC = getOrNull (jl_intrinsics::INTRINSIC); \
404- if (INTRINSIC == callee) { \
405- LOWER_INTRINSIC_FUNC (CI, F); \
406- ++it; \
407- continue ; \
408- } \
409-
410- LOWER_WB_INTRINSIC (queueGCRoot, lowerQueueGCRoot);
411-
412317#ifdef MMTK_GC
413- LOWER_WB_INTRINSIC (writeBarrier1, lowerWriteBarrier1 );
414- LOWER_WB_INTRINSIC (writeBarrier2, lowerWriteBarrier2 );
415- LOWER_WB_INTRINSIC (writeBarrier1Slow, lowerWriteBarrier1Slow );
416- LOWER_WB_INTRINSIC (writeBarrier2Slow, lowerWriteBarrier2Slow );
318+ LOWER_INTRINSIC (writeBarrier1, lowerNewGCFrame );
319+ LOWER_INTRINSIC (writeBarrier2, lowerNewGCFrame );
320+ LOWER_INTRINSIC (writeBarrier1Slow, lowerNewGCFrame );
321+ LOWER_INTRINSIC (writeBarrier2Slow, lowerNewGCFrame );
417322#endif
418- ++it;
419323
420324#undef LOWER_INTRINSIC
421- #undef LOWER_WB_INTRINSIC
422325 }
423326 }
424327
0 commit comments