From ec26ddedd9e95309115f0adc575aaa8405bae2bd Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 29 Jun 2025 12:45:42 -0400 Subject: [PATCH] Remove no-op cleanups on MIR post-monomorphization This speeds up LLVM and improves codegen overall. As an example, for cargo this cuts ~5% of the LLVM IR lines we generate (measured with -Cno-prepopulate-passes). --- Cargo.lock | 1 + compiler/rustc_codegen_ssa/Cargo.toml | 1 + compiler/rustc_codegen_ssa/src/mir/analyze.rs | 12 +- compiler/rustc_codegen_ssa/src/mir/block.rs | 16 +- compiler/rustc_codegen_ssa/src/mir/mod.rs | 56 +++++- compiler/rustc_mir_transform/src/lib.rs | 2 +- .../src/remove_noop_landing_pads.rs | 167 +++++++++++------- tests/codegen/mem-replace-big-type.rs | 2 +- 8 files changed, 180 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e1cf17e2c01ca..e5e996ca3239f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3505,6 +3505,7 @@ dependencies = [ "rustc_macros", "rustc_metadata", "rustc_middle", + "rustc_mir_transform", "rustc_query_system", "rustc_serialize", "rustc_session", diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index cfae1b3ec98e5..1b7cfc2dd38b1 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -30,6 +30,7 @@ rustc_index = { path = "../rustc_index" } rustc_macros = { path = "../rustc_macros" } rustc_metadata = { path = "../rustc_metadata" } rustc_middle = { path = "../rustc_middle" } +rustc_mir_transform = { path = "../rustc_mir_transform" } rustc_query_system = { path = "../rustc_query_system" } rustc_serialize = { path = "../rustc_serialize" } rustc_session = { path = "../rustc_session" } diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs index 99f35b7920864..edd745e6690dc 100644 --- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs +++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs @@ -279,10 +279,14 @@ impl CleanupKind { /// MSVC requires unwinding code to be split to a tree of *funclets*, where each funclet can only /// branch to itself or to its parent. Luckily, the code we generates matches this pattern. /// Recover that structure in an analyze pass. -pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec { +pub(crate) fn cleanup_kinds( + mir: &mir::Body<'_>, + nop_landing_pads: &DenseBitSet, +) -> IndexVec { fn discover_masters<'tcx>( result: &mut IndexSlice, mir: &mir::Body<'tcx>, + nop_landing_pads: &DenseBitSet, ) { for (bb, data) in mir.basic_blocks.iter_enumerated() { match data.terminator().kind { @@ -301,7 +305,9 @@ pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec { - if let mir::UnwindAction::Cleanup(unwind) = unwind { + if let mir::UnwindAction::Cleanup(unwind) = unwind + && !nop_landing_pads.contains(unwind) + { debug!( "cleanup_kinds: {:?}/{:?} registering {:?} as funclet", bb, data, unwind @@ -382,7 +388,7 @@ pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec TerminatorCodegenHelper<'tcx> { } let unwind_block = match unwind { - mir::UnwindAction::Cleanup(cleanup) => Some(self.llbb_with_cleanup(fx, cleanup)), + mir::UnwindAction::Cleanup(cleanup) => { + if !fx.nop_landing_pads.contains(cleanup) { + Some(self.llbb_with_cleanup(fx, cleanup)) + } else { + None + } + } mir::UnwindAction::Continue => None, mir::UnwindAction::Unreachable => None, mir::UnwindAction::Terminate(reason) => { @@ -286,7 +292,13 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> { mergeable_succ: bool, ) -> MergingSucc { let unwind_target = match unwind { - mir::UnwindAction::Cleanup(cleanup) => Some(self.llbb_with_cleanup(fx, cleanup)), + mir::UnwindAction::Cleanup(cleanup) => { + if !fx.nop_landing_pads.contains(cleanup) { + Some(self.llbb_with_cleanup(fx, cleanup)) + } else { + None + } + } mir::UnwindAction::Terminate(reason) => Some(fx.terminate_block(reason)), mir::UnwindAction::Continue => None, mir::UnwindAction::Unreachable => None, diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 66c4af4c935b5..41b571ced158e 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -97,6 +97,8 @@ pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> { /// A cold block is a block that is unlikely to be executed at runtime. cold_blocks: IndexVec, + nop_landing_pads: DenseBitSet, + /// The location where each MIR arg/var/tmp/ret is stored. This is /// usually an `PlaceRef` representing an alloca, but not always: /// sometimes we can skip the alloca and just store the value @@ -176,8 +178,14 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( let mut mir = tcx.instance_mir(instance.def); - let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty()); - debug!("fn_abi: {:?}", fn_abi); + let nop_landing_pads = rustc_mir_transform::remove_noop_landing_pads::find_noop_landing_pads( + mir, + Some(rustc_mir_transform::remove_noop_landing_pads::ExtraInfo { + tcx, + instance, + typing_env: cx.typing_env(), + }), + ); if tcx.features().ergonomic_clones() { let monomorphized_mir = instance.instantiate_mir_and_normalize_erasing_regions( @@ -188,19 +196,23 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( mir = tcx.arena.alloc(optimize_use_clone::(cx, monomorphized_mir)); } + let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty()); + debug!("fn_abi: {:?}", fn_abi); + let debug_context = cx.create_function_debug_context(instance, fn_abi, llfn, &mir); let start_llbb = Bx::append_block(cx, llfn, "start"); let mut start_bx = Bx::build(cx, start_llbb); - if mir.basic_blocks.iter().any(|bb| { - bb.is_cleanup || matches!(bb.terminator().unwind(), Some(mir::UnwindAction::Terminate(_))) + if mir::traversal::mono_reachable(&mir, tcx, instance).any(|(bb, block)| { + (block.is_cleanup && !nop_landing_pads.contains(bb)) + || matches!(block.terminator().unwind(), Some(mir::UnwindAction::Terminate(_))) }) { start_bx.set_personality_fn(cx.eh_personality()); } - let cleanup_kinds = - base::wants_new_eh_instructions(tcx.sess).then(|| analyze::cleanup_kinds(&mir)); + let cleanup_kinds = base::wants_new_eh_instructions(tcx.sess) + .then(|| analyze::cleanup_kinds(&mir, &nop_landing_pads)); let cached_llbbs: IndexVec> = mir.basic_blocks @@ -228,6 +240,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( debug_context, per_local_var_debug_info: None, caller_location: None, + nop_landing_pads, }; // It may seem like we should iterate over `required_consts` to ensure they all successfully @@ -239,7 +252,36 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( fx.compute_per_local_var_debug_info(&mut start_bx).unzip(); fx.per_local_var_debug_info = per_local_var_debug_info; - let traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance); + let mut traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance); + + // Filter out blocks that won't be codegen'd because of nop_landing_pads optimization. + // FIXME: We might want to integrate the nop_landing_pads analysis into mono reachability. + { + let mut reachable = DenseBitSet::new_empty(mir.basic_blocks.len()); + let mut to_visit = vec![mir::START_BLOCK]; + while let Some(next) = to_visit.pop() { + if !reachable.insert(next) { + continue; + } + + let block = &mir.basic_blocks[next]; + if let Some(mir::UnwindAction::Cleanup(target)) = block.terminator().unwind() + && fx.nop_landing_pads.contains(*target) + { + // This edge will not be followed when we actually codegen, so skip generating it here. + // + // It's guaranteed that the cleanup block (`target`) occurs only in + // UnwindAction::Cleanup(...) -- i.e., we can't incorrectly filter too much here -- + // because cleanup transitions must happen via UnwindAction::Cleanup. + to_visit.extend(block.terminator().successors().filter(|s| s != target)); + } else { + to_visit.extend(block.terminator().successors()); + } + } + + traversal_order.retain(|bb| reachable.contains(*bb)); + } + let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order); // Allocate variable and temp allocas diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index 572ad585c8c87..62e3aa46e4053 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -158,7 +158,7 @@ declare_passes! { mod prettify : ReorderBasicBlocks, ReorderLocals; mod promote_consts : PromoteTemps; mod ref_prop : ReferencePropagation; - mod remove_noop_landing_pads : RemoveNoopLandingPads; + pub mod remove_noop_landing_pads : RemoveNoopLandingPads; mod remove_place_mention : RemovePlaceMention; mod remove_storage_markers : RemoveStorageMarkers; mod remove_uninit_drops : RemoveUninitDrops; diff --git a/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs b/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs index 797056ad52d4a..1bea1269bf660 100644 --- a/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs +++ b/compiler/rustc_mir_transform/src/remove_noop_landing_pads.rs @@ -1,6 +1,7 @@ use rustc_index::bit_set::DenseBitSet; use rustc_middle::mir::*; -use rustc_middle::ty::TyCtxt; +use rustc_middle::ty; +use rustc_middle::ty::{Instance, TyCtxt}; use rustc_target::spec::PanicStrategy; use tracing::debug; @@ -41,14 +42,13 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveNoopLandingPads { let mut jumps_folded = 0; let mut landing_pads_removed = 0; - let mut nop_landing_pads = DenseBitSet::new_empty(body.basic_blocks.len()); + let nop_landing_pads = find_noop_landing_pads(body, None); // This is a post-order traversal, so that if A post-dominates B // then A will be visited before B. - let postorder: Vec<_> = traversal::postorder(body).map(|(bb, _)| bb).collect(); - for bb in postorder { + for (bb, block) in body.basic_blocks_mut().iter_enumerated_mut() { debug!(" processing {:?}", bb); - if let Some(unwind) = body[bb].terminator_mut().unwind_mut() { + if let Some(unwind) = block.terminator_mut().unwind_mut() { if let UnwindAction::Cleanup(unwind_bb) = *unwind { if nop_landing_pads.contains(unwind_bb) { debug!(" removing noop landing pad"); @@ -58,19 +58,13 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveNoopLandingPads { } } - body[bb].terminator_mut().successors_mut(|target| { + block.terminator_mut().successors_mut(|target| { if *target != resume_block && nop_landing_pads.contains(*target) { debug!(" folding noop jump to {:?} to resume block", target); *target = resume_block; jumps_folded += 1; } }); - - let is_nop_landing_pad = self.is_nop_landing_pad(bb, body, &nop_landing_pads); - if is_nop_landing_pad { - nop_landing_pads.insert(bb); - } - debug!(" is_nop_landing_pad({:?}) = {}", bb, is_nop_landing_pad); } debug!("removed {:?} jumps and {:?} landing pads", jumps_folded, landing_pads_removed); @@ -81,65 +75,112 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveNoopLandingPads { } } -impl RemoveNoopLandingPads { - fn is_nop_landing_pad( - &self, - bb: BasicBlock, - body: &Body<'_>, - nop_landing_pads: &DenseBitSet, - ) -> bool { - for stmt in &body[bb].statements { - match &stmt.kind { - StatementKind::FakeRead(..) - | StatementKind::StorageLive(_) - | StatementKind::StorageDead(_) - | StatementKind::PlaceMention(..) - | StatementKind::AscribeUserType(..) - | StatementKind::Coverage(..) - | StatementKind::ConstEvalCounter - | StatementKind::BackwardIncompatibleDropHint { .. } - | StatementKind::Nop => { - // These are all noops in a landing pad - } +/// This provides extra information that allows further analysis. +/// +/// Used by rustc_codegen_ssa. +pub struct ExtraInfo<'tcx> { + pub tcx: TyCtxt<'tcx>, + pub instance: Instance<'tcx>, + pub typing_env: ty::TypingEnv<'tcx>, +} - StatementKind::Assign(box (place, Rvalue::Use(_) | Rvalue::Discriminant(_))) => { - if place.as_local().is_some() { - // Writing to a local (e.g., a drop flag) does not - // turn a landing pad to a non-nop - } else { - return false; - } - } +pub fn find_noop_landing_pads<'tcx>( + body: &Body<'tcx>, + extra: Option>, +) -> DenseBitSet { + let mut nop_landing_pads = DenseBitSet::new_empty(body.basic_blocks.len()); + + // This is a post-order traversal, so that if A post-dominates B + // then A will be visited before B. + let postorder: Vec<_> = traversal::postorder(body).map(|(bb, _)| bb).collect(); + for bb in postorder { + let is_nop_landing_pad = is_nop_landing_pad(bb, body, &nop_landing_pads, extra.as_ref()); + if is_nop_landing_pad { + nop_landing_pads.insert(bb); + } + debug!(" is_nop_landing_pad({:?}) = {}", bb, is_nop_landing_pad); + } + + nop_landing_pads +} - StatementKind::Assign { .. } - | StatementKind::SetDiscriminant { .. } - | StatementKind::Deinit(..) - | StatementKind::Intrinsic(..) - | StatementKind::Retag { .. } => { +fn is_nop_landing_pad<'tcx>( + bb: BasicBlock, + body: &Body<'tcx>, + nop_landing_pads: &DenseBitSet, + extra: Option<&ExtraInfo<'tcx>>, +) -> bool { + for stmt in &body[bb].statements { + match &stmt.kind { + StatementKind::FakeRead(..) + | StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + | StatementKind::PlaceMention(..) + | StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::ConstEvalCounter + | StatementKind::BackwardIncompatibleDropHint { .. } + | StatementKind::Nop => { + // These are all noops in a landing pad + } + + StatementKind::Assign(box (place, Rvalue::Use(_) | Rvalue::Discriminant(_))) => { + if place.as_local().is_some() { + // Writing to a local (e.g., a drop flag) does not + // turn a landing pad to a non-nop + } else { return false; } } + + StatementKind::Assign { .. } + | StatementKind::SetDiscriminant { .. } + | StatementKind::Deinit(..) + | StatementKind::Intrinsic(..) + | StatementKind::Retag { .. } => { + return false; + } + } + } + + let terminator = body[bb].terminator(); + match terminator.kind { + TerminatorKind::Goto { .. } + | TerminatorKind::UnwindResume + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } => { + terminator.successors().all(|succ| nop_landing_pads.contains(succ)) } - let terminator = body[bb].terminator(); - match terminator.kind { - TerminatorKind::Goto { .. } - | TerminatorKind::UnwindResume - | TerminatorKind::SwitchInt { .. } - | TerminatorKind::FalseEdge { .. } - | TerminatorKind::FalseUnwind { .. } => { - terminator.successors().all(|succ| nop_landing_pads.contains(succ)) + TerminatorKind::Drop { place, .. } => { + if let Some(extra) = extra { + let ty = place.ty(body, extra.tcx).ty; + debug!("monomorphize: instance={:?}", extra.instance); + let ty = extra.instance.instantiate_mir_and_normalize_erasing_regions( + extra.tcx, + extra.typing_env, + ty::EarlyBinder::bind(ty), + ); + let drop_fn = Instance::resolve_drop_in_place(extra.tcx, ty); + if let ty::InstanceKind::DropGlue(_, None) = drop_fn.def { + // no need to drop anything, if all of our successors are also no-op then we + // can be skipped. + return terminator.successors().all(|succ| nop_landing_pads.contains(succ)); + } } - TerminatorKind::CoroutineDrop - | TerminatorKind::Yield { .. } - | TerminatorKind::Return - | TerminatorKind::UnwindTerminate(_) - | TerminatorKind::Unreachable - | TerminatorKind::Call { .. } - | TerminatorKind::TailCall { .. } - | TerminatorKind::Assert { .. } - | TerminatorKind::Drop { .. } - | TerminatorKind::InlineAsm { .. } => false, + + false } + + TerminatorKind::CoroutineDrop + | TerminatorKind::Yield { .. } + | TerminatorKind::Return + | TerminatorKind::UnwindTerminate(_) + | TerminatorKind::Call { .. } + | TerminatorKind::TailCall { .. } + | TerminatorKind::Unreachable + | TerminatorKind::Assert { .. } + | TerminatorKind::InlineAsm { .. } => false, } } diff --git a/tests/codegen/mem-replace-big-type.rs b/tests/codegen/mem-replace-big-type.rs index 0b2229ba7d104..b28b505b4e5c9 100644 --- a/tests/codegen/mem-replace-big-type.rs +++ b/tests/codegen/mem-replace-big-type.rs @@ -25,7 +25,7 @@ pub fn replace_big(dst: &mut Big, src: Big) -> Big { // CHECK-NOT: call void @llvm.memcpy // For a large type, we expect exactly three `memcpy`s -// CHECK-LABEL: define internal void @{{.+}}mem{{.+}}replace{{.+}}(ptr +// CHECK-LABEL: define void @{{.+}}mem{{.+}}replace{{.+}}(ptr // CHECK-SAME: sret([56 x i8]){{.+}}[[RESULT:%.+]], ptr{{.+}}%dest, ptr{{.+}}%src) // CHECK-NOT: call void @llvm.memcpy // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 8 [[RESULT]], ptr align 8 %dest, i{{.*}} 56, i1 false)