diff --git a/src/global_state.rs b/src/global_state.rs index b5a78d9bbe..b270d94924 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -49,6 +49,8 @@ pub struct GlobalState { pub(crate) malloc_bytes: AtomicUsize, /// This stores the live bytes and the used bytes (by pages) for each space in last GC. This counter is only updated in the GC release phase. pub(crate) live_bytes_in_last_gc: AtomicRefCell>, + /// The number of used pages at the end of the last GC. This can be used to estimate how many pages we have allocated since last GC. + pub(crate) used_pages_after_last_gc: AtomicUsize, } impl GlobalState { @@ -184,6 +186,15 @@ impl GlobalState { pub(crate) fn decrease_malloc_bytes_by(&self, size: usize) { self.malloc_bytes.fetch_sub(size, Ordering::SeqCst); } + + pub(crate) fn set_used_pages_after_last_gc(&self, pages: usize) { + self.used_pages_after_last_gc + .store(pages, Ordering::Relaxed); + } + + pub(crate) fn get_used_pages_after_last_gc(&self) -> usize { + self.used_pages_after_last_gc.load(Ordering::Relaxed) + } } impl Default for GlobalState { @@ -206,6 +217,7 @@ impl Default for GlobalState { #[cfg(feature = "malloc_counted_size")] malloc_bytes: AtomicUsize::new(0), live_bytes_in_last_gc: AtomicRefCell::new(HashMap::new()), + used_pages_after_last_gc: AtomicUsize::new(0), } } } diff --git a/src/plan/barriers.rs b/src/plan/barriers.rs index 56c069c982..5a0bf59493 100644 --- a/src/plan/barriers.rs +++ b/src/plan/barriers.rs @@ -19,8 +19,11 @@ use downcast_rs::Downcast; pub enum BarrierSelector { /// No barrier is used. NoBarrier, - /// Object remembering barrier is used. + /// Object remembering post-write barrier is used. ObjectBarrier, + /// Object remembering pre-write barrier with weak reference loading barrier. + // TODO: We might be able to generalize this to object remembering pre-write barrier. + SATBBarrier, } impl BarrierSelector { @@ -43,8 +46,22 @@ impl BarrierSelector { /// As a performance optimization, the binding may also choose to port the fast-path to the VM side, /// and call the slow-path (`object_reference_write_slow`) only if necessary. pub trait Barrier: 'static + Send + Downcast { + /// Flush thread-local states like buffers or remembered sets. fn flush(&mut self) {} + /// Weak reference loading barrier. A mutator should call this when loading from a weak + /// reference field, for example, when executing `java.lang.ref.Reference.get()` in JVM, or + /// loading from a global weak table in CRuby. + /// + /// Note: Merely loading from a field holding weak reference into a local variable will create a + /// strong reference from the stack to the referent, changing its reachablilty from weakly + /// reachable to strongly reachable. Concurrent garbage collectors may need to handle such + /// events specially. See [SATBBarrier::load_weak_reference] for a concrete example. + /// + /// Arguments: + /// * `referent`: The referent object which the weak reference is pointing to. + fn load_weak_reference(&mut self, _referent: ObjectReference) {} + /// Subsuming barrier for object reference write fn object_reference_write( &mut self, @@ -159,6 +176,9 @@ pub trait BarrierSemantics: 'static + Send { /// Object will probably be modified fn object_probable_write_slow(&mut self, _obj: ObjectReference) {} + + /// Loading from a weak reference field + fn load_weak_reference(&mut self, _o: ObjectReference) {} } /// Generic object barrier with a type argument defining it's slow-path behaviour. @@ -167,6 +187,7 @@ pub struct ObjectBarrier { } impl ObjectBarrier { + /// Create a new ObjectBarrier with the given semantics. pub fn new(semantics: S) -> Self { Self { semantics } } @@ -250,3 +271,91 @@ impl Barrier for ObjectBarrier { } } } + +/// A SATB (Snapshot-At-The-Beginning) barrier implementation. +/// This barrier is basically a pre-write object barrier with a weak reference loading barrier. +pub struct SATBBarrier { + weak_ref_barrier_enabled: bool, + semantics: S, +} + +impl SATBBarrier { + /// Create a new SATBBarrier with the given semantics. + pub fn new(semantics: S) -> Self { + Self { + weak_ref_barrier_enabled: false, + semantics, + } + } + + pub(crate) fn set_weak_ref_barrier_enabled(&mut self, value: bool) { + self.weak_ref_barrier_enabled = value; + } + + fn object_is_unlogged(&self, object: ObjectReference) -> bool { + S::UNLOG_BIT_SPEC.load_atomic::(object, None, Ordering::SeqCst) != 0 + } +} + +impl Barrier for SATBBarrier { + fn flush(&mut self) { + self.semantics.flush(); + } + + fn load_weak_reference(&mut self, o: ObjectReference) { + if self.weak_ref_barrier_enabled { + self.semantics.load_weak_reference(o) + } + } + + fn object_probable_write(&mut self, obj: ObjectReference) { + self.semantics.object_probable_write_slow(obj); + } + + fn object_reference_write_pre( + &mut self, + src: ObjectReference, + slot: ::VMSlot, + target: Option, + ) { + if self.object_is_unlogged(src) { + self.semantics + .object_reference_write_slow(src, slot, target); + } + } + + fn object_reference_write_post( + &mut self, + _src: ObjectReference, + _slot: ::VMSlot, + _target: Option, + ) { + unimplemented!() + } + + fn object_reference_write_slow( + &mut self, + src: ObjectReference, + slot: ::VMSlot, + target: Option, + ) { + self.semantics + .object_reference_write_slow(src, slot, target); + } + + fn memory_region_copy_pre( + &mut self, + src: ::VMMemorySlice, + dst: ::VMMemorySlice, + ) { + self.semantics.memory_region_copy_slow(src, dst); + } + + fn memory_region_copy_post( + &mut self, + _src: ::VMMemorySlice, + _dst: ::VMMemorySlice, + ) { + unimplemented!() + } +} diff --git a/src/plan/concurrent/barrier.rs b/src/plan/concurrent/barrier.rs new file mode 100644 index 0000000000..0bd8995564 --- /dev/null +++ b/src/plan/concurrent/barrier.rs @@ -0,0 +1,163 @@ +use std::sync::atomic::Ordering; + +use super::{concurrent_marking_work::ProcessModBufSATB, Pause}; +use crate::plan::global::PlanTraceObject; +use crate::policy::gc_work::TraceKind; +use crate::util::VMMutatorThread; +use crate::{ + plan::{barriers::BarrierSemantics, concurrent::global::ConcurrentPlan, VectorQueue}, + scheduler::WorkBucketStage, + util::ObjectReference, + vm::{ + slot::{MemorySlice, Slot}, + VMBinding, + }, + MMTK, +}; + +pub struct SATBBarrierSemantics< + VM: VMBinding, + P: ConcurrentPlan + PlanTraceObject, + const KIND: TraceKind, +> { + mmtk: &'static MMTK, + tls: VMMutatorThread, + satb: VectorQueue, + refs: VectorQueue, + plan: &'static P, +} + +impl + PlanTraceObject, const KIND: TraceKind> + SATBBarrierSemantics +{ + pub fn new(mmtk: &'static MMTK, tls: VMMutatorThread) -> Self { + Self { + mmtk, + tls, + satb: VectorQueue::default(), + refs: VectorQueue::default(), + plan: mmtk.get_plan().downcast_ref::

().unwrap(), + } + } + + fn slow(&mut self, _src: Option, _slot: VM::VMSlot, old: ObjectReference) { + self.satb.push(old); + if self.satb.is_full() { + self.flush_satb(); + } + } + + fn enqueue_node( + &mut self, + src: Option, + slot: VM::VMSlot, + _new: Option, + ) -> bool { + if let Some(old) = slot.load() { + self.slow(src, slot, old); + } + true + } + + /// Attempt to atomically log an object. + /// Returns true if the object is not logged previously. + fn log_object(&self, object: ObjectReference) -> bool { + Self::UNLOG_BIT_SPEC.store_atomic::(object, 0, None, Ordering::SeqCst); + true + } + + fn flush_satb(&mut self) { + if !self.satb.is_empty() { + if self.should_create_satb_packets() { + let satb = self.satb.take(); + let bucket = if self.plan.concurrent_work_in_progress() { + WorkBucketStage::Concurrent + } else { + debug_assert_ne!(self.plan.current_pause(), Some(Pause::InitialMark)); + WorkBucketStage::Closure + }; + self.mmtk.scheduler.work_buckets[bucket] + .add(ProcessModBufSATB::::new(satb)); + } else { + let _ = self.satb.take(); + }; + } + } + + #[cold] + fn flush_weak_refs(&mut self) { + if !self.refs.is_empty() { + let nodes = self.refs.take(); + let bucket = if self.plan.concurrent_work_in_progress() { + WorkBucketStage::Concurrent + } else { + debug_assert_ne!(self.plan.current_pause(), Some(Pause::InitialMark)); + WorkBucketStage::Closure + }; + self.mmtk.scheduler.work_buckets[bucket] + .add(ProcessModBufSATB::::new(nodes)); + } + } + + fn should_create_satb_packets(&self) -> bool { + self.plan.concurrent_work_in_progress() + || self.plan.current_pause() == Some(Pause::FinalMark) + } +} + +impl + PlanTraceObject, const KIND: TraceKind> + BarrierSemantics for SATBBarrierSemantics +{ + type VM = VM; + + #[cold] + fn flush(&mut self) { + self.flush_satb(); + self.flush_weak_refs(); + } + + fn object_reference_write_slow( + &mut self, + src: ObjectReference, + _slot: ::VMSlot, + _target: Option, + ) { + self.object_probable_write_slow(src); + self.log_object(src); + } + + fn memory_region_copy_slow( + &mut self, + _src: ::VMMemorySlice, + dst: ::VMMemorySlice, + ) { + for s in dst.iter_slots() { + self.enqueue_node(None, s, None); + } + } + + /// Enqueue the referent during concurrent marking. + /// + /// Note: During concurrent marking, a collector based on snapshot-at-the-beginning (SATB) will + /// not reach objects that were weakly reachable at the time of `InitialMark`. But if a mutator + /// loads from a weak reference field during concurrent marking, it will make the referent + /// strongly reachable, yet the referent is still not part of the SATB. We must conservatively + /// enqueue the referent even though its reachability has not yet been established, otherwise it + /// (and its children) may be treated as garbage if it happened to be weakly reachable at the + /// time of `InitialMark`. + fn load_weak_reference(&mut self, o: ObjectReference) { + if !self.plan.concurrent_work_in_progress() { + return; + } + self.refs.push(o); + if self.refs.is_full() { + self.flush_weak_refs(); + } + } + + fn object_probable_write_slow(&mut self, obj: ObjectReference) { + crate::plan::tracing::SlotIterator::::iterate_fields(obj, self.tls.0, |s| { + self.enqueue_node(Some(obj), s, None); + }); + } +} diff --git a/src/plan/concurrent/concurrent_marking_work.rs b/src/plan/concurrent/concurrent_marking_work.rs new file mode 100644 index 0000000000..fca994a7bc --- /dev/null +++ b/src/plan/concurrent/concurrent_marking_work.rs @@ -0,0 +1,298 @@ +use crate::plan::concurrent::global::ConcurrentPlan; +use crate::plan::concurrent::Pause; +use crate::plan::PlanTraceObject; +use crate::plan::VectorQueue; +use crate::policy::gc_work::TraceKind; +use crate::scheduler::gc_work::{ScanObjects, SlotOf}; +use crate::util::ObjectReference; +use crate::vm::slot::Slot; +use crate::{ + plan::ObjectQueue, + scheduler::{gc_work::ProcessEdgesBase, GCWork, GCWorker, ProcessEdgesWork, WorkBucketStage}, + vm::*, + MMTK, +}; +use std::ops::{Deref, DerefMut}; + +pub struct ConcurrentTraceObjects< + VM: VMBinding, + P: ConcurrentPlan + PlanTraceObject, + const KIND: TraceKind, +> { + plan: &'static P, + // objects to mark and scan + objects: Option>, + // recursively generated objects + next_objects: VectorQueue, + worker: *mut GCWorker, +} + +impl + PlanTraceObject, const KIND: TraceKind> + ConcurrentTraceObjects +{ + const SATB_BUFFER_SIZE: usize = 8192; + + pub fn new(objects: Vec, mmtk: &'static MMTK) -> Self { + let plan = mmtk.get_plan().downcast_ref::

().unwrap(); + + Self { + plan, + objects: Some(objects), + next_objects: VectorQueue::default(), + worker: std::ptr::null_mut(), + } + } + + pub fn worker(&self) -> &'static mut GCWorker { + debug_assert_ne!(self.worker, std::ptr::null_mut()); + unsafe { &mut *self.worker } + } + + #[cold] + fn flush(&mut self) { + if !self.next_objects.is_empty() { + let objects = self.next_objects.take(); + let worker = self.worker(); + let w = Self::new(objects, worker.mmtk); + worker.add_work(WorkBucketStage::Concurrent, w); + } + } + + fn trace_object(&mut self, object: ObjectReference) -> ObjectReference { + let new_object = self + .plan + .trace_object::(self, object, self.worker()); + // No copying should happen. + debug_assert_eq!(object, new_object); + object + } + + fn trace_objects(&mut self, objects: &[ObjectReference]) { + for o in objects.iter() { + self.trace_object(*o); + } + } + + fn scan_and_enqueue(&mut self, object: ObjectReference) { + crate::plan::tracing::SlotIterator::::iterate_fields( + object, + self.worker().tls.0, + |s| { + let Some(t) = s.load() else { + return; + }; + + self.next_objects.push(t); + if self.next_objects.len() > Self::SATB_BUFFER_SIZE { + self.flush(); + } + }, + ); + self.plan.post_scan_object(object); + } +} + +impl + PlanTraceObject, const KIND: TraceKind> + ObjectQueue for ConcurrentTraceObjects +{ + fn enqueue(&mut self, object: ObjectReference) { + debug_assert!( + object.to_raw_address().is_mapped(), + "Invalid obj {:?}: address is not mapped", + object + ); + self.scan_and_enqueue(object); + } +} + +unsafe impl + PlanTraceObject, const KIND: TraceKind> + Send for ConcurrentTraceObjects +{ +} + +impl + PlanTraceObject, const KIND: TraceKind> + GCWork for ConcurrentTraceObjects +{ + fn do_work(&mut self, worker: &mut GCWorker, _mmtk: &'static MMTK) { + self.worker = worker; + let mut num_objects = 0; + let mut num_next_objects = 0; + let mut iterations = 0; + // mark objects + if let Some(objects) = self.objects.take() { + self.trace_objects(&objects); + num_objects = objects.len(); + } + let pause_opt = self.plan.current_pause(); + if pause_opt == Some(Pause::FinalMark) || pause_opt.is_none() { + while !self.next_objects.is_empty() { + let pause_opt = self.plan.current_pause(); + if !(pause_opt == Some(Pause::FinalMark) || pause_opt.is_none()) { + break; + } + let next_objects = self.next_objects.take(); + self.trace_objects(&next_objects); + num_next_objects += next_objects.len(); + iterations += 1; + } + } + probe!( + mmtk, + concurrent_trace_objects, + num_objects, + num_next_objects, + iterations + ); + self.flush(); + } +} + +pub struct ProcessModBufSATB< + VM: VMBinding, + P: ConcurrentPlan + PlanTraceObject, + const KIND: TraceKind, +> { + nodes: Option>, + _p: std::marker::PhantomData<(VM, P)>, +} + +unsafe impl + PlanTraceObject, const KIND: TraceKind> + Send for ProcessModBufSATB +{ +} + +impl + PlanTraceObject, const KIND: TraceKind> + ProcessModBufSATB +{ + pub fn new(nodes: Vec) -> Self { + Self { + nodes: Some(nodes), + _p: std::marker::PhantomData, + } + } +} + +impl + PlanTraceObject, const KIND: TraceKind> + GCWork for ProcessModBufSATB +{ + fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { + let mut w = if let Some(nodes) = self.nodes.take() { + if nodes.is_empty() { + return; + } + + ConcurrentTraceObjects::::new(nodes, mmtk) + } else { + return; + }; + GCWork::do_work(&mut w, worker, mmtk); + } +} + +pub struct ProcessRootSlots< + VM: VMBinding, + P: ConcurrentPlan + PlanTraceObject, + const KIND: TraceKind, +> { + base: ProcessEdgesBase, + _p: std::marker::PhantomData

, +} + +unsafe impl + PlanTraceObject, const KIND: TraceKind> + Send for ProcessRootSlots +{ +} + +impl + PlanTraceObject, const KIND: TraceKind> + ProcessRootSlots +{ + fn create_and_schedule_concurrent_trace_objects_work(&self, objects: Vec) { + let worker = self.worker(); + let mmtk = self.mmtk(); + let w = ConcurrentTraceObjects::::new(objects.clone(), mmtk); + + worker.scheduler().work_buckets[WorkBucketStage::Concurrent].add_no_notify(w); + } +} + +impl + PlanTraceObject, const KIND: TraceKind> + ProcessEdgesWork for ProcessRootSlots +{ + type VM = VM; + type ScanObjectsWorkType = ScanObjects; + const OVERWRITE_REFERENCE: bool = false; + const SCAN_OBJECTS_IMMEDIATELY: bool = true; + + fn new( + slots: Vec>, + roots: bool, + mmtk: &'static MMTK, + bucket: WorkBucketStage, + ) -> Self { + debug_assert!(roots); + let base = ProcessEdgesBase::new(slots, roots, mmtk, bucket); + Self { + base, + _p: std::marker::PhantomData, + } + } + + fn flush(&mut self) {} + + fn trace_object(&mut self, _object: ObjectReference) -> ObjectReference { + unreachable!() + } + + fn process_slots(&mut self) { + let pause = self + .base + .plan() + .concurrent() + .unwrap() + .current_pause() + .unwrap(); + // No need to scan roots in the final mark + if pause == Pause::FinalMark { + return; + } + debug_assert_eq!(pause, Pause::InitialMark); + let mut root_objects = Vec::with_capacity(Self::CAPACITY); + if !self.slots.is_empty() { + let slots = std::mem::take(&mut self.slots); + for slot in slots { + if let Some(object) = slot.load() { + root_objects.push(object); + if root_objects.len() == Self::CAPACITY { + let mut buffer = Vec::with_capacity(Self::CAPACITY); + std::mem::swap(&mut buffer, &mut root_objects); + self.create_and_schedule_concurrent_trace_objects_work(buffer); + } + } + } + if !root_objects.is_empty() { + self.create_and_schedule_concurrent_trace_objects_work(root_objects); + } + } + } + + fn create_scan_work(&self, _nodes: Vec) -> Self::ScanObjectsWorkType { + unimplemented!() + } +} + +impl + PlanTraceObject, const KIND: TraceKind> Deref + for ProcessRootSlots +{ + type Target = ProcessEdgesBase; + fn deref(&self) -> &Self::Target { + &self.base + } +} + +impl + PlanTraceObject, const KIND: TraceKind> + DerefMut for ProcessRootSlots +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.base + } +} diff --git a/src/plan/concurrent/global.rs b/src/plan/concurrent/global.rs new file mode 100644 index 0000000000..bcfbb40d2d --- /dev/null +++ b/src/plan/concurrent/global.rs @@ -0,0 +1,10 @@ +use crate::plan::concurrent::Pause; +use crate::plan::Plan; + +/// Trait for a concurrent plan. +pub trait ConcurrentPlan: Plan { + /// Return `true`` if concurrent work (such as concurrent marking) is in progress. + fn concurrent_work_in_progress(&self) -> bool; + /// Return the current pause kind. `None` if not in a pause. + fn current_pause(&self) -> Option; +} diff --git a/src/plan/concurrent/immix/gc_work.rs b/src/plan/concurrent/immix/gc_work.rs new file mode 100644 index 0000000000..6372105313 --- /dev/null +++ b/src/plan/concurrent/immix/gc_work.rs @@ -0,0 +1,25 @@ +use crate::plan::concurrent::immix::global::ConcurrentImmix; +use crate::policy::gc_work::{TraceKind, TRACE_KIND_TRANSITIVE_PIN}; +use crate::scheduler::gc_work::{PlanProcessEdges, UnsupportedProcessEdges}; +use crate::scheduler::ProcessEdgesWork; +use crate::vm::VMBinding; + +pub(super) struct ConcurrentImmixSTWGCWorkContext( + std::marker::PhantomData, +); +impl crate::scheduler::GCWorkContext + for ConcurrentImmixSTWGCWorkContext +{ + type VM = VM; + type PlanType = ConcurrentImmix; + type DefaultProcessEdges = PlanProcessEdges, KIND>; + type PinningProcessEdges = PlanProcessEdges, TRACE_KIND_TRANSITIVE_PIN>; +} +pub(super) struct ConcurrentImmixGCWorkContext(std::marker::PhantomData); + +impl crate::scheduler::GCWorkContext for ConcurrentImmixGCWorkContext { + type VM = E::VM; + type PlanType = ConcurrentImmix; + type DefaultProcessEdges = E; + type PinningProcessEdges = UnsupportedProcessEdges; +} diff --git a/src/plan/concurrent/immix/global.rs b/src/plan/concurrent/immix/global.rs new file mode 100644 index 0000000000..118132f46d --- /dev/null +++ b/src/plan/concurrent/immix/global.rs @@ -0,0 +1,442 @@ +use crate::plan::concurrent::concurrent_marking_work::ProcessRootSlots; +use crate::plan::concurrent::global::ConcurrentPlan; +use crate::plan::concurrent::immix::gc_work::ConcurrentImmixGCWorkContext; +use crate::plan::concurrent::immix::gc_work::ConcurrentImmixSTWGCWorkContext; +use crate::plan::concurrent::Pause; +use crate::plan::global::BasePlan; +use crate::plan::global::CommonPlan; +use crate::plan::global::CreateGeneralPlanArgs; +use crate::plan::global::CreateSpecificPlanArgs; +use crate::plan::immix::mutator::ALLOCATOR_MAPPING; +use crate::plan::AllocationSemantics; +use crate::plan::Plan; +use crate::plan::PlanConstraints; +use crate::policy::immix::defrag::StatsForDefrag; +use crate::policy::immix::ImmixSpaceArgs; +use crate::policy::immix::TRACE_KIND_DEFRAG; +use crate::policy::immix::TRACE_KIND_FAST; +use crate::policy::space::Space; +use crate::scheduler::gc_work::Release; +use crate::scheduler::gc_work::StopMutators; +use crate::scheduler::gc_work::UnsupportedProcessEdges; +use crate::scheduler::*; +use crate::util::alloc::allocators::AllocatorSelector; +use crate::util::copy::*; +use crate::util::heap::gc_trigger::SpaceStats; +use crate::util::heap::VMRequest; +use crate::util::metadata::log_bit::UnlogBitsOperation; +use crate::util::metadata::side_metadata::SideMetadataContext; +use crate::vm::ObjectModel; +use crate::vm::VMBinding; +use crate::{policy::immix::ImmixSpace, util::opaque_pointer::VMWorkerThread}; +use std::sync::atomic::AtomicBool; + +use atomic::Atomic; +use atomic::Ordering; +use enum_map::EnumMap; + +use mmtk_macros::{HasSpaces, PlanTraceObject}; + +/// A concurrent Immix plan. The plan supports concurrent collection (strictly non-moving) and STW full heap collection (which may do defrag). +/// The concurrent GC consists of two STW pauses (initial mark and final mark) with concurrent marking in between. +#[derive(HasSpaces, PlanTraceObject)] +pub struct ConcurrentImmix { + #[post_scan] + #[space] + #[copy_semantics(CopySemantics::DefaultCopy)] + pub immix_space: ImmixSpace, + #[parent] + pub common: CommonPlan, + last_gc_was_defrag: AtomicBool, + current_pause: Atomic>, + previous_pause: Atomic>, + should_do_full_gc: AtomicBool, + concurrent_marking_active: AtomicBool, +} + +/// The plan constraints for the concurrent immix plan. +pub const CONCURRENT_IMMIX_CONSTRAINTS: PlanConstraints = PlanConstraints { + // If we disable moving in Immix, this is a non-moving plan. + moves_objects: !cfg!(feature = "immix_non_moving"), + // Max immix object size is half of a block. + max_non_los_default_alloc_bytes: crate::policy::immix::MAX_IMMIX_OBJECT_SIZE, + needs_prepare_mutator: true, + barrier: crate::BarrierSelector::SATBBarrier, + needs_log_bit: true, + ..PlanConstraints::default() +}; + +impl Plan for ConcurrentImmix { + fn collection_required(&self, space_full: bool, _space: Option>) -> bool { + if self.base().collection_required(self, space_full) { + self.should_do_full_gc.store(true, Ordering::Release); + info!("Triggering full GC"); + return true; + } + + let concurrent_marking_in_progress = self.concurrent_marking_in_progress(); + + if concurrent_marking_in_progress + && self.common.base.scheduler.work_buckets[WorkBucketStage::Concurrent].is_drained() + { + // After the Concurrent bucket is drained during concurrent marking, + // we trigger the FinalMark pause at the next poll() site (here). + // FIXME: Immediately trigger FinalMark when the Concurrent bucket is drained. + return true; + } + + let threshold = self.get_total_pages() >> 1; + let used_pages_after_last_gc = self.common.base.global_state.get_used_pages_after_last_gc(); + let used_pages_now = self.get_used_pages(); + let allocated = used_pages_now.saturating_sub(used_pages_after_last_gc); + if !concurrent_marking_in_progress && allocated > threshold { + info!("Allocated {allocated} pages since last GC ({used_pages_now} - {used_pages_after_last_gc} > {threshold}): Do concurrent marking"); + debug_assert!( + self.common.base.scheduler.work_buckets[WorkBucketStage::Concurrent].is_empty() + ); + debug_assert!(!self.concurrent_marking_in_progress()); + debug_assert_ne!(self.previous_pause(), Some(Pause::InitialMark)); + return true; + } + false + } + + fn last_collection_was_exhaustive(&self) -> bool { + self.immix_space + .is_last_gc_exhaustive(self.last_gc_was_defrag.load(Ordering::Relaxed)) + } + + fn constraints(&self) -> &'static PlanConstraints { + &CONCURRENT_IMMIX_CONSTRAINTS + } + + fn create_copy_config(&'static self) -> CopyConfig { + use enum_map::enum_map; + CopyConfig { + copy_mapping: enum_map! { + CopySemantics::DefaultCopy => CopySelector::Immix(0), + _ => CopySelector::Unused, + }, + space_mapping: vec![(CopySelector::Immix(0), &self.immix_space)], + constraints: &CONCURRENT_IMMIX_CONSTRAINTS, + } + } + + fn schedule_collection(&'static self, scheduler: &GCWorkScheduler) { + let pause = if self.concurrent_marking_in_progress() { + // FIXME: Currently it is unsafe to bypass `FinalMark` and go directly from `InitialMark` to `Full`. + // It is related to defragmentation. See https://github.com/mmtk/mmtk-core/issues/1357 for more details. + // We currently force `FinalMark` to happen if the last pause is `InitialMark`. + Pause::FinalMark + } else if self.should_do_full_gc.load(Ordering::SeqCst) { + Pause::Full + } else { + Pause::InitialMark + }; + + self.current_pause.store(Some(pause), Ordering::SeqCst); + + probe!(mmtk, concurrent_pause_determined, pause as usize); + + match pause { + Pause::Full => { + // Ref closure buckets is disabled by initial mark, and needs to be re-enabled for full GC before + // we reuse the normal Immix scheduling. + self.set_ref_closure_buckets_enabled(true); + crate::plan::immix::global::Immix::schedule_immix_full_heap_collection::< + ConcurrentImmix, + ConcurrentImmixSTWGCWorkContext, + ConcurrentImmixSTWGCWorkContext, + >(self, &self.immix_space, scheduler); + } + Pause::InitialMark => self.schedule_concurrent_marking_initial_pause(scheduler), + Pause::FinalMark => self.schedule_concurrent_marking_final_pause(scheduler), + } + } + + fn get_allocator_mapping(&self) -> &'static EnumMap { + &ALLOCATOR_MAPPING + } + + fn prepare(&mut self, tls: VMWorkerThread) { + let pause = self.current_pause().unwrap(); + match pause { + Pause::Full => { + self.common.prepare(tls, true); + self.immix_space.prepare( + true, + Some(StatsForDefrag::new(self)), + // Ignore unlog bits in full GCs because unlog bits should be all 0. + UnlogBitsOperation::NoOp, + ); + } + Pause::InitialMark => { + self.immix_space.prepare( + true, + Some(StatsForDefrag::new(self)), + // Bulk set log bits so SATB barrier will be triggered on the existing objects. + UnlogBitsOperation::BulkSet, + ); + + self.common.prepare(tls, true); + // Bulk set log bits so SATB barrier will be triggered on the existing objects. + self.common + .schedule_unlog_bits_op(UnlogBitsOperation::BulkSet); + } + Pause::FinalMark => (), + } + } + + fn release(&mut self, tls: VMWorkerThread) { + let pause = self.current_pause().unwrap(); + match pause { + Pause::InitialMark => (), + Pause::Full | Pause::FinalMark => { + self.immix_space.release( + true, + // Bulk clear log bits so SATB barrier will not be triggered. + UnlogBitsOperation::BulkClear, + ); + + self.common.release(tls, true); + + if pause == Pause::FinalMark { + // Bulk clear log bits so SATB barrier will not be triggered. + self.common + .schedule_unlog_bits_op(UnlogBitsOperation::BulkClear); + } else { + // Full pauses didn't set unlog bits in the first place, + // so there is no need to clear them. + // TODO: Currently InitialMark must be followed by a FinalMark. + // If we allow upgrading a concurrent GC to a full STW GC, + // we will need to clear the unlog bits at an appropriate place. + } + } + } + } + + fn end_of_gc(&mut self, _tls: VMWorkerThread) { + self.last_gc_was_defrag + .store(self.immix_space.end_of_gc(), Ordering::Relaxed); + + let pause = self.current_pause().unwrap(); + if pause == Pause::InitialMark { + self.set_concurrent_marking_state(true); + } + self.previous_pause.store(Some(pause), Ordering::SeqCst); + self.current_pause.store(None, Ordering::SeqCst); + if pause != Pause::FinalMark { + self.should_do_full_gc.store(false, Ordering::SeqCst); + } else { + // FIXME: Currently it is unsafe to trigger full GC during concurrent marking. + // See `Self::schedule_collection`. + // We keep the value of `self.should_do_full_gc` so that if full GC is triggered, + // the next GC will be full GC. + } + info!("{:?} end", pause); + } + + fn current_gc_may_move_object(&self) -> bool { + self.immix_space.in_defrag() + } + + fn get_collection_reserved_pages(&self) -> usize { + self.immix_space.defrag_headroom_pages() + } + + fn get_used_pages(&self) -> usize { + self.immix_space.reserved_pages() + self.common.get_used_pages() + } + + fn base(&self) -> &BasePlan { + &self.common.base + } + + fn base_mut(&mut self) -> &mut BasePlan { + &mut self.common.base + } + + fn common(&self) -> &CommonPlan { + &self.common + } + + fn notify_mutators_paused(&self, _scheduler: &GCWorkScheduler) { + use crate::vm::ActivePlan; + let pause = self.current_pause().unwrap(); + match pause { + Pause::Full => { + self.set_concurrent_marking_state(false); + } + Pause::InitialMark => { + debug_assert!( + !self.concurrent_marking_in_progress(), + "prev pause: {:?}", + self.previous_pause().unwrap() + ); + } + Pause::FinalMark => { + debug_assert!(self.concurrent_marking_in_progress()); + // Flush barrier buffers + for mutator in ::VMActivePlan::mutators() { + mutator.barrier.flush(); + } + self.set_concurrent_marking_state(false); + } + } + info!("{:?} start", pause); + } + + fn concurrent(&self) -> Option<&dyn ConcurrentPlan> { + Some(self) + } +} + +impl ConcurrentImmix { + pub fn new(args: CreateGeneralPlanArgs) -> Self { + let spec = crate::util::metadata::extract_side_metadata(&[ + *VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC, + ]); + + let mut plan_args = CreateSpecificPlanArgs { + global_args: args, + constraints: &CONCURRENT_IMMIX_CONSTRAINTS, + global_side_metadata_specs: SideMetadataContext::new_global_specs(&spec), + }; + + let immix_args = ImmixSpaceArgs { + mixed_age: false, + never_move_objects: false, + }; + + // These buckets are not used in an Immix plan. We can simply disable them. + // TODO: We should be more systmatic on this, and disable unnecessary buckets for other plans as well. + let scheduler = &plan_args.global_args.scheduler; + scheduler.work_buckets[WorkBucketStage::VMRefForwarding].set_enabled(false); + scheduler.work_buckets[WorkBucketStage::CalculateForwarding].set_enabled(false); + scheduler.work_buckets[WorkBucketStage::SecondRoots].set_enabled(false); + scheduler.work_buckets[WorkBucketStage::RefForwarding].set_enabled(false); + scheduler.work_buckets[WorkBucketStage::FinalizableForwarding].set_enabled(false); + scheduler.work_buckets[WorkBucketStage::Compact].set_enabled(false); + + let immix = ConcurrentImmix { + immix_space: ImmixSpace::new( + plan_args.get_normal_space_args("immix", true, false, VMRequest::discontiguous()), + immix_args, + ), + common: CommonPlan::new(plan_args), + last_gc_was_defrag: AtomicBool::new(false), + current_pause: Atomic::new(None), + previous_pause: Atomic::new(None), + should_do_full_gc: AtomicBool::new(false), + concurrent_marking_active: AtomicBool::new(false), + }; + + immix.verify_side_metadata_sanity(); + + immix + } + + fn set_ref_closure_buckets_enabled(&self, do_closure: bool) { + let scheduler = &self.common.base.scheduler; + scheduler.work_buckets[WorkBucketStage::VMRefClosure].set_enabled(do_closure); + scheduler.work_buckets[WorkBucketStage::WeakRefClosure].set_enabled(do_closure); + scheduler.work_buckets[WorkBucketStage::FinalRefClosure].set_enabled(do_closure); + scheduler.work_buckets[WorkBucketStage::SoftRefClosure].set_enabled(do_closure); + scheduler.work_buckets[WorkBucketStage::PhantomRefClosure].set_enabled(do_closure); + } + + pub(crate) fn schedule_concurrent_marking_initial_pause( + &'static self, + scheduler: &GCWorkScheduler, + ) { + use crate::scheduler::gc_work::Prepare; + + self.set_ref_closure_buckets_enabled(false); + + scheduler.work_buckets[WorkBucketStage::Unconstrained].add(StopMutators::< + ConcurrentImmixGCWorkContext>, + >::new()); + scheduler.work_buckets[WorkBucketStage::Prepare].add(Prepare::< + ConcurrentImmixGCWorkContext>, + >::new(self)); + } + + fn schedule_concurrent_marking_final_pause(&'static self, scheduler: &GCWorkScheduler) { + self.set_ref_closure_buckets_enabled(true); + + // Skip root scanning in the final mark + scheduler.work_buckets[WorkBucketStage::Unconstrained].add(StopMutators::< + ConcurrentImmixGCWorkContext>, + >::new_no_scan_roots()); + + scheduler.work_buckets[WorkBucketStage::Release].add(Release::< + ConcurrentImmixGCWorkContext>, + >::new(self)); + + // Deal with weak ref and finalizers + // TODO: Check against schedule_common_work and see if we are still missing any work packet + type RefProcessingEdges = + crate::scheduler::gc_work::PlanProcessEdges, TRACE_KIND_FAST>; + // Reference processing + if !*self.base().options.no_reference_types { + use crate::util::reference_processor::{ + PhantomRefProcessing, SoftRefProcessing, WeakRefProcessing, + }; + scheduler.work_buckets[WorkBucketStage::SoftRefClosure] + .add(SoftRefProcessing::>::new()); + scheduler.work_buckets[WorkBucketStage::WeakRefClosure] + .add(WeakRefProcessing::::new()); + scheduler.work_buckets[WorkBucketStage::PhantomRefClosure] + .add(PhantomRefProcessing::::new()); + + use crate::util::reference_processor::RefEnqueue; + scheduler.work_buckets[WorkBucketStage::Release].add(RefEnqueue::::new()); + } + + // Finalization + if !*self.base().options.no_finalizer { + use crate::util::finalizable_processor::Finalization; + // finalization + scheduler.work_buckets[WorkBucketStage::FinalRefClosure] + .add(Finalization::>::new()); + } + } + + pub fn concurrent_marking_in_progress(&self) -> bool { + self.concurrent_marking_active.load(Ordering::Acquire) + } + + fn set_concurrent_marking_state(&self, active: bool) { + use crate::plan::global::HasSpaces; + + // Tell the spaces to allocate new objects as live + let allocate_object_as_live = active; + self.for_each_space(&mut |space: &dyn Space| { + space.set_allocate_as_live(allocate_object_as_live); + }); + + // Store the state. + self.concurrent_marking_active + .store(active, Ordering::SeqCst); + + // We also set SATB barrier as active -- this is done in Mutator prepare/release. + } + + pub(super) fn is_concurrent_marking_active(&self) -> bool { + self.concurrent_marking_active.load(Ordering::SeqCst) + } + + fn previous_pause(&self) -> Option { + self.previous_pause.load(Ordering::SeqCst) + } +} + +impl ConcurrentPlan for ConcurrentImmix { + fn current_pause(&self) -> Option { + self.current_pause.load(Ordering::SeqCst) + } + + fn concurrent_work_in_progress(&self) -> bool { + self.concurrent_marking_in_progress() + } +} diff --git a/src/plan/concurrent/immix/mod.rs b/src/plan/concurrent/immix/mod.rs new file mode 100644 index 0000000000..0f55961897 --- /dev/null +++ b/src/plan/concurrent/immix/mod.rs @@ -0,0 +1,7 @@ +//! Plan: concurrent immix + +pub(in crate::plan) mod gc_work; +pub(in crate::plan) mod global; +pub(in crate::plan) mod mutator; + +pub use global::ConcurrentImmix; diff --git a/src/plan/concurrent/immix/mutator.rs b/src/plan/concurrent/immix/mutator.rs new file mode 100644 index 0000000000..291c63e72f --- /dev/null +++ b/src/plan/concurrent/immix/mutator.rs @@ -0,0 +1,130 @@ +use crate::plan::barriers::SATBBarrier; +use crate::plan::concurrent::barrier::SATBBarrierSemantics; +use crate::plan::concurrent::immix::ConcurrentImmix; +use crate::plan::concurrent::Pause; +use crate::plan::mutator_context::create_allocator_mapping; +use crate::plan::mutator_context::create_space_mapping; + +use crate::plan::mutator_context::Mutator; +use crate::plan::mutator_context::MutatorBuilder; +use crate::plan::mutator_context::MutatorConfig; +use crate::plan::mutator_context::ReservedAllocators; +use crate::plan::AllocationSemantics; +use crate::util::alloc::allocators::AllocatorSelector; +use crate::util::alloc::ImmixAllocator; +use crate::util::opaque_pointer::{VMMutatorThread, VMWorkerThread}; +use crate::vm::VMBinding; +use crate::MMTK; +use enum_map::EnumMap; + +type BarrierSemanticsType = + SATBBarrierSemantics, { crate::policy::immix::TRACE_KIND_FAST }>; + +type BarrierType = SATBBarrier>; + +pub fn concurrent_immix_mutator_release( + mutator: &mut Mutator, + _tls: VMWorkerThread, +) { + // Release is not scheduled for initial mark pause + let current_pause = mutator.plan.concurrent().unwrap().current_pause().unwrap(); + debug_assert_ne!(current_pause, Pause::InitialMark); + + let immix_allocator = unsafe { + mutator + .allocators + .get_allocator_mut(mutator.config.allocator_mapping[AllocationSemantics::Default]) + } + .downcast_mut::>() + .unwrap(); + immix_allocator.reset(); + + // Deactivate SATB + if current_pause == Pause::Full || current_pause == Pause::FinalMark { + debug!("Deactivate SATB barrier active for {:?}", mutator as *mut _); + mutator + .barrier + .downcast_mut::>() + .unwrap() + .set_weak_ref_barrier_enabled(false); + } +} + +pub fn concurent_immix_mutator_prepare( + mutator: &mut Mutator, + _tls: VMWorkerThread, +) { + // Prepare is not scheduled for final mark pause + let current_pause = mutator.plan.concurrent().unwrap().current_pause().unwrap(); + debug_assert_ne!(current_pause, Pause::FinalMark); + + let immix_allocator = unsafe { + mutator + .allocators + .get_allocator_mut(mutator.config.allocator_mapping[AllocationSemantics::Default]) + } + .downcast_mut::>() + .unwrap(); + immix_allocator.reset(); + + // Activate SATB + if current_pause == Pause::InitialMark { + debug!("Activate SATB barrier active for {:?}", mutator as *mut _); + mutator + .barrier + .downcast_mut::>() + .unwrap() + .set_weak_ref_barrier_enabled(true); + } +} + +pub(in crate::plan) const RESERVED_ALLOCATORS: ReservedAllocators = ReservedAllocators { + n_immix: 1, + ..ReservedAllocators::DEFAULT +}; + +lazy_static! { + pub static ref ALLOCATOR_MAPPING: EnumMap = { + let mut map = create_allocator_mapping(RESERVED_ALLOCATORS, true); + map[AllocationSemantics::Default] = AllocatorSelector::Immix(0); + map + }; +} + +pub fn create_concurrent_immix_mutator( + mutator_tls: VMMutatorThread, + mmtk: &'static MMTK, +) -> Mutator { + let immix = mmtk + .get_plan() + .downcast_ref::>() + .unwrap(); + let config = MutatorConfig { + allocator_mapping: &ALLOCATOR_MAPPING, + space_mapping: Box::new({ + let mut vec = create_space_mapping(RESERVED_ALLOCATORS, true, immix); + vec.push((AllocatorSelector::Immix(0), &immix.immix_space)); + vec + }), + + prepare_func: &concurent_immix_mutator_prepare, + release_func: &concurrent_immix_mutator_release, + }; + + let builder = MutatorBuilder::new(mutator_tls, mmtk, config); + let mut mutator = builder + .barrier(Box::new(SATBBarrier::new(BarrierSemanticsType::::new( + mmtk, + mutator_tls, + )))) + .build(); + + // Set barrier active, based on whether concurrent marking is in progress + mutator + .barrier + .downcast_mut::>() + .unwrap() + .set_weak_ref_barrier_enabled(immix.is_concurrent_marking_active()); + + mutator +} diff --git a/src/plan/concurrent/mod.rs b/src/plan/concurrent/mod.rs new file mode 100644 index 0000000000..1c0a5b6a09 --- /dev/null +++ b/src/plan/concurrent/mod.rs @@ -0,0 +1,34 @@ +pub mod barrier; +pub(super) mod concurrent_marking_work; +pub(super) mod global; + +pub mod immix; + +use bytemuck::NoUninit; + +/// The pause type for a concurrent GC phase. +// TODO: This is probably not be general enough for all the concurrent plans. +// TODO: We could consider moving this to specific plans later. +#[repr(u8)] +#[derive(Debug, PartialEq, Eq, Copy, Clone, NoUninit)] +pub enum Pause { + /// A whole GC (including root scanning, closure, releasing, etc.) happening in a single pause. + /// + /// Don't be confused with "full-heap" GC in generational collectors. `Pause::Full` can also + /// refer to a nursery GC that happens in a single pause. + Full = 1, + /// The initial pause before concurrent marking. + InitialMark, + /// The pause after concurrent marking. + FinalMark, +} + +unsafe impl bytemuck::ZeroableInOption for Pause {} + +unsafe impl bytemuck::PodInOption for Pause {} + +impl Default for Pause { + fn default() -> Self { + Self::Full + } +} diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index e3a8462f96..944558184d 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -30,6 +30,7 @@ impl GCRequester { // `GCWorkScheduler::request_schedule_collection` needs to hold a mutex to communicate // with GC workers, which is expensive for functions like `poll`. We use the atomic // flag `request_flag` to elide the need to acquire the mutex in subsequent calls. + probe!(mmtk, gcrequester_request); self.scheduler.request_schedule_collection(); } } diff --git a/src/plan/gc_work.rs b/src/plan/gc_work.rs new file mode 100644 index 0000000000..4998d1101c --- /dev/null +++ b/src/plan/gc_work.rs @@ -0,0 +1,32 @@ +//! This module holds work packets for `CommonPlan` and `BasePlan`, or other work packets not +//! directly related to scheduling. + +use crate::{plan::global::CommonPlan, scheduler::GCWork, vm::VMBinding}; + +pub(super) struct SetCommonPlanUnlogBits { + pub common_plan: &'static CommonPlan, +} + +impl GCWork for SetCommonPlanUnlogBits { + fn do_work( + &mut self, + _worker: &mut crate::scheduler::GCWorker, + _mmtk: &'static crate::MMTK, + ) { + self.common_plan.set_side_log_bits(); + } +} + +pub(super) struct ClearCommonPlanUnlogBits { + pub common_plan: &'static CommonPlan, +} + +impl GCWork for ClearCommonPlanUnlogBits { + fn do_work( + &mut self, + _worker: &mut crate::scheduler::GCWorker, + _mmtk: &'static crate::MMTK, + ) { + self.common_plan.clear_side_log_bits(); + } +} diff --git a/src/plan/generational/immix/global.rs b/src/plan/generational/immix/global.rs index 41e7a70768..02e9df1b9f 100644 --- a/src/plan/generational/immix/global.rs +++ b/src/plan/generational/immix/global.rs @@ -10,6 +10,7 @@ use crate::plan::AllocationSemantics; use crate::plan::Plan; use crate::plan::PlanConstraints; use crate::policy::gc_work::TraceKind; +use crate::policy::immix::defrag::StatsForDefrag; use crate::policy::immix::ImmixSpace; use crate::policy::immix::ImmixSpaceArgs; use crate::policy::immix::{TRACE_KIND_DEFRAG, TRACE_KIND_FAST}; @@ -20,6 +21,7 @@ use crate::util::alloc::allocators::AllocatorSelector; use crate::util::copy::*; use crate::util::heap::gc_trigger::SpaceStats; use crate::util::heap::VMRequest; +use crate::util::metadata::log_bit::UnlogBitsOperation; use crate::util::Address; use crate::util::ObjectReference; use crate::util::VMWorkerThread; @@ -129,13 +131,15 @@ impl Plan for GenImmix { let full_heap = !self.gen.is_current_gc_nursery(); self.gen.prepare(tls); if full_heap { - if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() { - self.immix_space.clear_side_log_bits(); - } self.immix_space.prepare( full_heap, - Some(crate::policy::immix::defrag::StatsForDefrag::new(self)), + Some(StatsForDefrag::new(self)), + // Bulk clear unlog bits so that we will reconstruct them. + UnlogBitsOperation::BulkClear, ); + } else { + // We don't do anything special to unlog bits during nursery GC + // because ProcessModBuf will set the unlog bits back. } } @@ -143,8 +147,16 @@ impl Plan for GenImmix { let full_heap = !self.gen.is_current_gc_nursery(); self.gen.release(tls); if full_heap { - self.immix_space.release(full_heap); + self.immix_space.release( + full_heap, + // We reconstructred unlog bits during tracing. Keep them. + UnlogBitsOperation::NoOp, + ); + } else { + // We don't do anything special to unlog bits during nursery GC + // because ProcessModBuf has set the unlog bits back. } + self.last_gc_was_full_heap .store(full_heap, Ordering::Relaxed); } diff --git a/src/plan/global.rs b/src/plan/global.rs index f0736c4df5..5cb6ce9bb9 100644 --- a/src/plan/global.rs +++ b/src/plan/global.rs @@ -3,6 +3,7 @@ use super::PlanConstraints; use crate::global_state::GlobalState; use crate::mmtk::MMTK; +use crate::plan::gc_work::{ClearCommonPlanUnlogBits, SetCommonPlanUnlogBits}; use crate::plan::tracing::ObjectQueue; use crate::plan::Mutator; use crate::policy::immortalspace::ImmortalSpace; @@ -19,6 +20,7 @@ use crate::util::heap::layout::Mmapper; use crate::util::heap::layout::VMMap; use crate::util::heap::HeapMeta; use crate::util::heap::VMRequest; +use crate::util::metadata::log_bit::UnlogBitsOperation; use crate::util::metadata::side_metadata::SideMetadataSanity; use crate::util::metadata::side_metadata::SideMetadataSpec; use crate::util::options::Options; @@ -58,6 +60,9 @@ pub fn create_mutator( PlanSelector::StickyImmix => { crate::plan::sticky::immix::mutator::create_stickyimmix_mutator(tls, mmtk) } + PlanSelector::ConcurrentImmix => { + crate::plan::concurrent::immix::mutator::create_concurrent_immix_mutator(tls, mmtk) + } PlanSelector::Compressor => { crate::plan::compressor::mutator::create_compressor_mutator(tls, mmtk) } @@ -94,6 +99,10 @@ pub fn create_plan( PlanSelector::StickyImmix => { Box::new(crate::plan::sticky::immix::StickyImmix::new(args)) as Box> } + PlanSelector::ConcurrentImmix => { + Box::new(crate::plan::concurrent::immix::ConcurrentImmix::new(args)) + as Box> + } PlanSelector::Compressor => { Box::new(crate::plan::compressor::Compressor::new(args)) as Box> } @@ -179,6 +188,14 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast { None } + /// Return a reference to `ConcurrentPlan` to allow + /// access methods specific to concurrent plans if the plan is a concurrent plan. + fn concurrent( + &self, + ) -> Option<&dyn crate::plan::concurrent::global::ConcurrentPlan> { + None + } + /// Get the current run time options. fn options(&self) -> &Options { &self.base().options @@ -188,6 +205,9 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast { /// This defines what space this plan will allocate objects into for different semantics. fn get_allocator_mapping(&self) -> &'static EnumMap; + /// Called when all mutators are paused. This is called before prepare. + fn notify_mutators_paused(&self, _scheduler: &GCWorkScheduler) {} + /// Prepare the plan before a GC. This is invoked in an initial step in the GC. /// This is invoked once per GC by one worker thread. `tls` is the worker thread that executes this method. fn prepare(&mut self, tls: VMWorkerThread); @@ -203,6 +223,7 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast { /// Inform the plan about the end of a GC. It is guaranteed that there is no further work for this GC. /// This is invoked once per GC by one worker thread. `tls` is the worker thread that executes this method. + // TODO: This is actually called at the end of a pause/STW, rather than the end of a GC. It should be renamed. fn end_of_gc(&mut self, _tls: VMWorkerThread); /// Notify the plan that an emergency collection will happen. The plan should try to free as much memory as possible. @@ -734,6 +755,25 @@ impl CommonPlan { self.base.release(tls, full_heap) } + pub(crate) fn schedule_unlog_bits_op(&mut self, unlog_bits_op: UnlogBitsOperation) { + if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() { + // # Safety: CommonPlan reference is always valid within this collection cycle. + let common_plan = unsafe { &*(self as *const CommonPlan) }; + + match unlog_bits_op { + UnlogBitsOperation::NoOp => {} + UnlogBitsOperation::BulkSet => { + self.base.scheduler.work_buckets[WorkBucketStage::Prepare] + .add(SetCommonPlanUnlogBits { common_plan }); + } + UnlogBitsOperation::BulkClear => { + self.base.scheduler.work_buckets[WorkBucketStage::Release] + .add(ClearCommonPlanUnlogBits { common_plan }); + } + } + } + } + pub fn clear_side_log_bits(&self) { self.immortal.clear_side_log_bits(); self.los.clear_side_log_bits(); @@ -788,7 +828,7 @@ impl CommonPlan { } else if #[cfg(feature = "marksweep_as_nonmoving")] { self.nonmoving.prepare(_full_heap); } else { - self.nonmoving.prepare(_full_heap, None); + self.nonmoving.prepare(_full_heap, None, UnlogBitsOperation::NoOp); } } } @@ -800,7 +840,7 @@ impl CommonPlan { } else if #[cfg(feature = "marksweep_as_nonmoving")] { self.nonmoving.prepare(_full_heap); } else { - self.nonmoving.release(_full_heap); + self.nonmoving.release(_full_heap, UnlogBitsOperation::NoOp); } } } diff --git a/src/plan/immix/global.rs b/src/plan/immix/global.rs index 136db58c43..f64d463ab4 100644 --- a/src/plan/immix/global.rs +++ b/src/plan/immix/global.rs @@ -15,6 +15,7 @@ use crate::util::alloc::allocators::AllocatorSelector; use crate::util::copy::*; use crate::util::heap::gc_trigger::SpaceStats; use crate::util::heap::VMRequest; +use crate::util::metadata::log_bit::UnlogBitsOperation; use crate::util::metadata::side_metadata::SideMetadataContext; use crate::vm::VMBinding; use crate::{policy::immix::ImmixSpace, util::opaque_pointer::VMWorkerThread}; @@ -84,17 +85,11 @@ impl Plan for Immix { } fn prepare(&mut self, tls: VMWorkerThread) { - self.common.prepare(tls, true); - self.immix_space.prepare( - true, - Some(crate::policy::immix::defrag::StatsForDefrag::new(self)), - ); + self.prepare_inner(tls, UnlogBitsOperation::NoOp) } fn release(&mut self, tls: VMWorkerThread) { - self.common.release(tls, true); - // release the collected region - self.immix_space.release(true); + self.release_inner(tls, UnlogBitsOperation::NoOp); } fn end_of_gc(&mut self, tls: VMWorkerThread) { @@ -208,4 +203,31 @@ impl Immix { pub(in crate::plan) fn set_last_gc_was_defrag(&self, defrag: bool, order: Ordering) { self.last_gc_was_defrag.store(defrag, order) } + + /// Prepare with unlog-bit operation. + /// Some Immix-derived plans may need to set/clear unlog bits when preparing. + pub(in crate::plan) fn prepare_inner( + &mut self, + tls: VMWorkerThread, + unlog_bits_op: UnlogBitsOperation, + ) { + self.common.prepare(tls, true); + self.immix_space.prepare( + true, + Some(crate::policy::immix::defrag::StatsForDefrag::new(self)), + unlog_bits_op, + ); + } + + /// Release with unlog-bit operation. + /// Some Immix-derived plans may need to set/clear unlog bits when releasing. + pub(in crate::plan) fn release_inner( + &mut self, + tls: VMWorkerThread, + unlog_bits_op: UnlogBitsOperation, + ) { + self.common.release(tls, true); + // release the collected region + self.immix_space.release(true, unlog_bits_op); + } } diff --git a/src/plan/mod.rs b/src/plan/mod.rs index e86a7fe590..ea7455e9eb 100644 --- a/src/plan/mod.rs +++ b/src/plan/mod.rs @@ -18,6 +18,8 @@ pub use barriers::BarrierSelector; pub(crate) mod gc_requester; +mod gc_work; + mod global; pub(crate) use global::create_gc_worker_context; pub(crate) use global::create_mutator; @@ -45,6 +47,7 @@ mod generational; mod sticky; mod compressor; +mod concurrent; mod immix; mod markcompact; mod marksweep; diff --git a/src/plan/sticky/immix/global.rs b/src/plan/sticky/immix/global.rs index 7dcd83ad3b..a3cac41c61 100644 --- a/src/plan/sticky/immix/global.rs +++ b/src/plan/sticky/immix/global.rs @@ -6,6 +6,7 @@ use crate::plan::immix; use crate::plan::PlanConstraints; use crate::policy::gc_work::TraceKind; use crate::policy::gc_work::TRACE_KIND_TRANSITIVE_PIN; +use crate::policy::immix::defrag::StatsForDefrag; use crate::policy::immix::ImmixSpace; use crate::policy::immix::TRACE_KIND_FAST; use crate::policy::sft::SFT; @@ -14,6 +15,7 @@ use crate::util::copy::CopyConfig; use crate::util::copy::CopySelector; use crate::util::copy::CopySemantics; use crate::util::heap::gc_trigger::SpaceStats; +use crate::util::metadata::log_bit::UnlogBitsOperation; use crate::util::metadata::side_metadata::SideMetadataContext; use crate::util::statistics::counter::EventCounter; use crate::vm::ObjectModel; @@ -118,24 +120,37 @@ impl Plan for StickyImmix { // Prepare both large object space and immix space self.immix.immix_space.prepare( false, - Some(crate::policy::immix::defrag::StatsForDefrag::new(self)), + Some(StatsForDefrag::new(self)), + // We don't do anything special to unlog bits during nursery GC + // because ProcessModBuf will set the unlog bits back. + UnlogBitsOperation::NoOp, ); self.immix.common.los.prepare(false); } else { self.full_heap_gc_count.lock().unwrap().inc(); - if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() { - self.immix.immix_space.clear_side_log_bits(); - } - self.immix.prepare(tls); + self.immix.prepare_inner( + tls, + // We will reconstruct unlog bits during tracing. + UnlogBitsOperation::BulkClear, + ); } } fn release(&mut self, tls: crate::util::VMWorkerThread) { if self.is_current_gc_nursery() { - self.immix.immix_space.release(false); + self.immix.immix_space.release( + false, + // We don't do anything special to unlog bits during nursery GC + // because ProcessModBuf has set the unlog bits back. + UnlogBitsOperation::NoOp, + ); self.immix.common.los.release(false); } else { - self.immix.release(tls); + self.immix.release_inner( + tls, + // We reconstructred unlog bits during tracing. Keep them. + UnlogBitsOperation::NoOp, + ); } } diff --git a/src/plan/tracing.rs b/src/plan/tracing.rs index eecd40cbaf..792e142c76 100644 --- a/src/plan/tracing.rs +++ b/src/plan/tracing.rs @@ -1,10 +1,12 @@ //! This module contains code useful for tracing, //! i.e. visiting the reachable objects by traversing all or part of an object graph. +use std::marker::PhantomData; + use crate::scheduler::gc_work::{ProcessEdgesWork, SlotOf}; use crate::scheduler::{GCWorker, WorkBucketStage, EDGES_WORK_BUFFER_SIZE}; -use crate::util::ObjectReference; -use crate::vm::SlotVisitor; +use crate::util::{ObjectReference, VMThread, VMWorkerThread}; +use crate::vm::{Scanning, SlotVisitor, VMBinding}; /// This trait represents an object queue to enqueue objects during tracing. pub trait ObjectQueue { @@ -63,6 +65,16 @@ impl VectorQueue { } self.buffer.push(v); } + + /// Return the len of the queue + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Empty the queue + pub fn clear(&mut self) { + self.buffer.clear() + } } impl Default for VectorQueue { @@ -134,3 +146,24 @@ impl Drop for ObjectsClosure<'_, E> { self.flush(); } } + +/// For iterating over the slots of an object. +// FIXME: This type iterates slots, but all of its current use cases only care about the values in the slots. +// And it currently only works if the object supports slot enqueuing (i.e. `Scanning::scan_object` is implemented). +// We may refactor the interface according to +pub(crate) struct SlotIterator { + _p: PhantomData, +} + +impl SlotIterator { + /// Iterate over the slots of an object by applying a function to each slot. + pub fn iterate_fields(object: ObjectReference, _tls: VMThread, mut f: F) { + // FIXME: We should use tls from the arguments. + // See https://github.com/mmtk/mmtk-core/issues/1375 + let fake_tls = VMWorkerThread(VMThread::UNINITIALIZED); + if !>::support_slot_enqueuing(fake_tls, object) { + panic!("SlotIterator::iterate_fields cannot be used on objects that don't support slot-enqueuing"); + } + >::scan_object(fake_tls, object, &mut f); + } +} diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index bc3a585eff..18c4045352 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -14,6 +14,7 @@ use crate::util::heap::chunk_map::*; use crate::util::heap::BlockPageResource; use crate::util::heap::PageResource; use crate::util::linear_scan::{Region, RegionIterator}; +use crate::util::metadata::log_bit::UnlogBitsOperation; use crate::util::metadata::side_metadata::SideMetadataSpec; #[cfg(feature = "vo_bit")] use crate::util::metadata::vo_bit; @@ -197,6 +198,9 @@ impl Space for ImmixSpace { } fn clear_side_log_bits(&self) { + // Remove the following warning if we have a legitimate use case. + warn!("ImmixSpace::clear_side_log_bits is single-treaded. Consider clearing side metadata in per-chunk work packets."); + let log_bit = VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.extract_side_spec(); for chunk in self.chunk_map.all_chunks() { log_bit.bzero_metadata(chunk.start(), Chunk::BYTES); @@ -204,6 +208,9 @@ impl Space for ImmixSpace { } fn set_side_log_bits(&self) { + // Remove the following warning if we have a legitimate use case. + warn!("ImmixSpace::set_side_log_bits is single-treaded. Consider setting side metadata in per-chunk work packets."); + let log_bit = VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.extract_side_spec(); for chunk in self.chunk_map.all_chunks() { log_bit.bset_metadata(chunk.start(), Chunk::BYTES); @@ -417,7 +424,12 @@ impl ImmixSpace { &self.scheduler } - pub fn prepare(&mut self, major_gc: bool, plan_stats: Option) { + pub(crate) fn prepare( + &mut self, + major_gc: bool, + plan_stats: Option, + unlog_bits_op: UnlogBitsOperation, + ) { if major_gc { // Update mark_state if VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.is_on_side() { @@ -445,6 +457,7 @@ impl ImmixSpace { } else { None }, + unlog_bits_op, }) }); self.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets); @@ -496,7 +509,7 @@ impl ImmixSpace { } /// Release for the immix space. - pub fn release(&mut self, major_gc: bool) { + pub(crate) fn release(&mut self, major_gc: bool, unlog_bits_op: UnlogBitsOperation) { if major_gc { // Update line_unavail_state for hole searching after this GC. if !super::BLOCK_ONLY { @@ -511,7 +524,7 @@ impl ImmixSpace { self.reusable_blocks.reset(); } // Sweep chunks and blocks - let work_packets = self.generate_sweep_tasks(); + let work_packets = self.generate_sweep_tasks(unlog_bits_op); self.scheduler().work_buckets[WorkBucketStage::Release].bulk_add(work_packets); self.lines_consumed.store(0, Ordering::Relaxed); @@ -528,7 +541,7 @@ impl ImmixSpace { } /// Generate chunk sweep tasks - fn generate_sweep_tasks(&self) -> Vec>> { + fn generate_sweep_tasks(&self, unlog_bits_op: UnlogBitsOperation) -> Vec>> { self.defrag.mark_histograms.lock().clear(); // # Safety: ImmixSpace reference is always valid within this collection cycle. let space = unsafe { &*(self as *const Self) }; @@ -540,6 +553,7 @@ impl ImmixSpace { Box::new(SweepChunk { space, chunk, + unlog_bits_op, epilogue: epilogue.clone(), }) }); @@ -909,6 +923,7 @@ pub struct PrepareBlockState { pub space: &'static ImmixSpace, pub chunk: Chunk, pub defrag_threshold: Option, + pub unlog_bits_op: UnlogBitsOperation, } impl PrepareBlockState { @@ -953,6 +968,9 @@ impl GCWork for PrepareBlockState { debug_assert!(!block.get_state().is_reusable()); debug_assert_ne!(block.get_state(), BlockState::Marked); } + + self.unlog_bits_op + .execute::(self.chunk.start(), Chunk::BYTES); } } @@ -960,6 +978,7 @@ impl GCWork for PrepareBlockState { struct SweepChunk { space: &'static ImmixSpace, chunk: Chunk, + unlog_bits_op: UnlogBitsOperation, /// A destructor invoked when all `SweepChunk` packets are finished. epilogue: Arc>, } @@ -1020,6 +1039,10 @@ impl GCWork for SweepChunk { self.space.chunk_map.set_allocated(self.chunk, false) } self.space.defrag.add_completed_mark_histogram(histogram); + + self.unlog_bits_op + .execute::(self.chunk.start(), Chunk::BYTES); + self.epilogue.finish_one_work_packet(); } } diff --git a/src/policy/immix/line.rs b/src/policy/immix/line.rs index 94036ecc65..a8d2e9686e 100644 --- a/src/policy/immix/line.rs +++ b/src/policy/immix/line.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use super::block::Block; use crate::util::linear_scan::{Region, RegionIterator}; use crate::util::metadata::side_metadata::SideMetadataSpec; @@ -81,4 +83,32 @@ impl Line { } marked_lines } + + /// Bulk set the local mark bits of a line range. + /// + /// This is useful during concurrent marking. By doing this, concurrent marking will + /// conservatively consider all objects allocated in the line range as live, and the mutator + /// doesn't need to explicitly mark bump-allocated objects in the fast path. + pub fn initialize_mark_table_as_marked(lines: Range) { + let meta = VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.extract_side_spec(); + let start = lines.start.start(); + let limit = lines.end.start(); + let size = limit - start; + meta.bset_metadata(start, size); + } + + /// Bulk set line mark states. + pub fn bulk_set_line_mark_states(line_mark_state: u8, lines: Range) { + for line in RegionIterator::::new(lines.start, lines.end) { + line.mark(line_mark_state); + } + } + + /// Eagerly mark all line mark states and all side mark bits in the gap. + /// + /// Useful during concurrent marking. + pub fn eager_mark_lines(line_mark_state: u8, lines: Range) { + Self::bulk_set_line_mark_states(line_mark_state, lines.clone()); + Self::initialize_mark_table_as_marked::(lines); + } } diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index 610083eac0..f62202eccf 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -61,6 +61,24 @@ impl SFT for LargeObjectSpace { true } fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool) { + if self.should_allocate_as_live() { + VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.store_atomic::( + object, + self.mark_state, + None, + Ordering::SeqCst, + ); + debug_assert!( + VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.load_atomic::( + object, + None, + Ordering::Acquire + ) == 0 + ); + + self.treadmill.add_to_treadmill(object, false); + return; + } let old_value = VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.load_atomic::( object, None, @@ -205,8 +223,6 @@ impl Space for LargeObjectSpace { } fn set_side_log_bits(&self) { - debug_assert!(self.treadmill.is_from_space_empty()); - debug_assert!(self.treadmill.is_nursery_empty()); let mut enumator = ClosureObjectEnumerator::<_, VM>::new(|object| { VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.mark_as_unlogged::(object, Ordering::SeqCst); }); @@ -267,7 +283,6 @@ impl LargeObjectSpace { pub fn prepare(&mut self, full_heap: bool) { if full_heap { - debug_assert!(self.treadmill.is_from_space_empty()); self.mark_state = MARK_BIT - self.mark_state; } self.treadmill.flip(full_heap); @@ -281,6 +296,7 @@ impl LargeObjectSpace { self.sweep_large_pages(false); } } + // Allow nested-if for this function to make it clear that test_and_mark() is only executed // for the outer condition is met. #[allow(clippy::collapsible_if)] @@ -413,6 +429,10 @@ impl LargeObjectSpace { ) & NURSERY_BIT == NURSERY_BIT } + + pub fn is_marked(&self, object: ObjectReference) -> bool { + self.test_mark_bit(object, self.mark_state) + } } fn get_super_page(cell: Address) -> Address { diff --git a/src/policy/space.rs b/src/policy/space.rs index fcfe9b4088..a80dc3f973 100644 --- a/src/policy/space.rs +++ b/src/policy/space.rs @@ -34,6 +34,7 @@ use crate::util::memory::{self, HugePageSupport, MmapProtection, MmapStrategy}; use crate::vm::VMBinding; use std::marker::PhantomData; +use std::sync::atomic::AtomicBool; use std::sync::Arc; use std::sync::Mutex; @@ -418,6 +419,18 @@ pub trait Space: 'static + SFT + Sync + Downcast { /// scanning VO bits because it is sparse. fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator); + fn set_allocate_as_live(&self, live: bool) { + self.common() + .allocate_as_live + .store(live, std::sync::atomic::Ordering::SeqCst); + } + + fn should_allocate_as_live(&self) -> bool { + self.common() + .allocate_as_live + .load(std::sync::atomic::Ordering::Acquire) + } + /// Clear the side log bits for allocated regions in this space. /// This method is only called if the plan knows the log bits are side metadata. fn clear_side_log_bits(&self); @@ -526,6 +539,8 @@ pub struct CommonSpace { pub global_state: Arc, pub options: Arc, + pub allocate_as_live: AtomicBool, + p: PhantomData, } @@ -600,6 +615,7 @@ impl CommonSpace { acquire_lock: Mutex::new(()), global_state: args.plan_args.global_state, options: args.plan_args.options.clone(), + allocate_as_live: AtomicBool::new(false), p: PhantomData, }; diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index 7e50c86aa3..bfc14cb7ec 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -191,11 +191,31 @@ impl GCWork for ReleaseCollector { /// /// TODO: Smaller work granularity #[derive(Default)] -pub struct StopMutators(PhantomData); +pub struct StopMutators { + /// If this is true, we skip creating [`ScanMutatorRoots`] work packets for mutators. + /// By default, this is false. + skip_mutator_roots: bool, + /// Flush mutators once they are stopped. By default this is false. [`ScanMutatorRoots`] will flush mutators. + flush_mutator: bool, + phantom: PhantomData, +} impl StopMutators { pub fn new() -> Self { - Self(PhantomData) + Self { + skip_mutator_roots: false, + flush_mutator: false, + phantom: PhantomData, + } + } + + /// Create a `StopMutators` work packet that does not create `ScanMutatorRoots` work packets for mutators, and will simply flush mutators. + pub fn new_no_scan_roots() -> Self { + Self { + skip_mutator_roots: true, + flush_mutator: true, + phantom: PhantomData, + } } } @@ -206,10 +226,17 @@ impl GCWork for StopMutators { ::VMCollection::stop_all_mutators(worker.tls, |mutator| { // TODO: The stack scanning work won't start immediately, as the `Prepare` bucket is not opened yet (the bucket is opened in notify_mutators_paused). // Should we push to Unconstrained instead? - mmtk.scheduler.work_buckets[WorkBucketStage::Prepare] - .add(ScanMutatorRoots::(mutator)); + + if self.flush_mutator { + mutator.flush(); + } + if !self.skip_mutator_roots { + mmtk.scheduler.work_buckets[WorkBucketStage::Prepare] + .add(ScanMutatorRoots::(mutator)); + } }); trace!("stop_all_mutators end"); + mmtk.get_plan().notify_mutators_paused(&mmtk.scheduler); mmtk.scheduler.notify_mutators_paused(mmtk); mmtk.scheduler.work_buckets[WorkBucketStage::Prepare].add(ScanVMSpecificRoots::::new()); } diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 33e89be0fe..5006edbe1d 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -17,7 +17,7 @@ pub(crate) use scheduler::GCWorkScheduler; mod stat; mod work_counter; -mod work; +pub(crate) mod work; pub use work::GCWork; pub(crate) use work::GCWorkContext; diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index d6c4eff74e..38d387b3c4 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -131,6 +131,7 @@ impl GCWorkScheduler { /// Add the `ScheduleCollection` packet. Called by the last parked worker. fn add_schedule_collection_packet(&self) { // We are still holding the mutex `WorkerMonitor::sync`. Do not notify now. + probe!(mmtk, add_schedule_collection_packet); self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); } @@ -272,6 +273,7 @@ impl GCWorkScheduler { /// /// Return true if there're any non-empty buckets updated. pub(crate) fn update_buckets(&self) -> bool { + debug!("update_buckets"); let mut buckets_updated = false; let mut new_packets = false; for i in 0..WorkBucketStage::LENGTH { @@ -457,11 +459,20 @@ impl GCWorkScheduler { LastParkedResult::WakeAll } else { // GC finished. - self.on_gc_finished(worker); + let concurrent_work_scheduled = self.on_gc_finished(worker); // Clear the current goal goals.on_current_goal_completed(); - self.respond_to_requests(worker, goals) + + if concurrent_work_scheduled { + // It was the initial mark pause and scheduled concurrent work. + // Wake up all GC workers to do concurrent work. + LastParkedResult::WakeAll + } else { + // It was an STW GC or the final mark pause of a concurrent GC. + // Respond to another goal. + self.respond_to_requests(worker, goals) + } } } WorkerGoal::StopForFork => { @@ -534,7 +545,9 @@ impl GCWorkScheduler { } /// Called when GC has finished, i.e. when all work packets have been executed. - fn on_gc_finished(&self, worker: &GCWorker) { + /// + /// Return `true` if any concurrent work packets have been scheduled. + fn on_gc_finished(&self, worker: &GCWorker) -> bool { // All GC workers must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); self.debug_assert_all_stw_buckets_empty(); @@ -590,6 +603,9 @@ impl GCWorkScheduler { } } + mmtk.state + .set_used_pages_after_last_gc(mmtk.get_plan().get_used_pages()); + #[cfg(feature = "extreme_assertions")] if crate::util::slot_logger::should_check_duplicate_slots(mmtk.get_plan()) { // reset the logging info at the end of each GC @@ -599,9 +615,14 @@ impl GCWorkScheduler { // Reset the triggering information. mmtk.state.reset_collection_trigger(); + let concurrent_work_scheduled = self.schedule_concurrent_packets(); + self.debug_assert_all_stw_buckets_closed(); + // Set to NotInGC after everything, and right before resuming mutators. mmtk.set_gc_status(GcStatus::NotInGC); ::VMCollection::resume_mutators(worker.tls); + + concurrent_work_scheduled } pub fn enable_stat(&self) { @@ -634,4 +655,17 @@ impl GCWorkScheduler { first_stw_bucket.open(); self.worker_monitor.notify_work_available(true); } + + pub(super) fn schedule_concurrent_packets(&self) -> bool { + let concurrent_bucket = &self.work_buckets[WorkBucketStage::Concurrent]; + if !concurrent_bucket.is_empty() { + concurrent_bucket.set_enabled(true); + concurrent_bucket.open(); + true + } else { + concurrent_bucket.set_enabled(false); + concurrent_bucket.close(); + false + } + } } diff --git a/src/util/alloc/immix_allocator.rs b/src/util/alloc/immix_allocator.rs index 807ddded90..eb2e5235fa 100644 --- a/src/util/alloc/immix_allocator.rs +++ b/src/util/alloc/immix_allocator.rs @@ -1,3 +1,4 @@ +use std::sync::atomic::Ordering; use std::sync::Arc; use super::allocator::{align_allocation_no_fill, fill_alignment_gap, AllocatorContext}; @@ -265,6 +266,11 @@ impl ImmixAllocator { // Update the hole-searching cursor to None. Some(end_line) }; + // mark objects if concurrent marking is active + if self.immix_space().should_allocate_as_live() { + let state = self.space.line_mark_state.load(Ordering::Acquire); + Line::eager_mark_lines::(state, start_line..end_line); + } return true; } else { // No more recyclable lines. Set the hole-searching cursor to None. @@ -305,6 +311,11 @@ impl ImmixAllocator { // Bulk clear stale line mark state Line::MARK_TABLE .bzero_metadata(block.start(), crate::policy::immix::block::Block::BYTES); + // mark objects if concurrent marking is active + if self.immix_space().should_allocate_as_live() { + let state = self.space.line_mark_state.load(Ordering::Acquire); + Line::eager_mark_lines::(state, block.start_line()..block.end_line()); + } if self.request_for_large { self.large_bump_pointer.cursor = block.start(); self.large_bump_pointer.limit = block.end(); diff --git a/src/util/metadata/log_bit.rs b/src/util/metadata/log_bit.rs index a5a9b8644f..6ea012acbd 100644 --- a/src/util/metadata/log_bit.rs +++ b/src/util/metadata/log_bit.rs @@ -1,4 +1,6 @@ +use crate::util::Address; use crate::util::ObjectReference; +use crate::vm::ObjectModel; use crate::vm::VMBinding; use crate::vm::VMGlobalLogBitSpec; use std::sync::atomic::Ordering; @@ -38,3 +40,31 @@ impl VMGlobalLogBitSpec { self.load_atomic::(object, None, order) == 1 } } + +/// This specifies what to do to the global side unlog bits in various functions or work packets. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum UnlogBitsOperation { + /// Do nothing. + NoOp, + /// Bulk set unlog bits to all 1s. + BulkSet, + /// Bulk clear unlog bits to all 0s. + BulkClear, +} + +impl UnlogBitsOperation { + /// Run the specified operation on the address range from `start` to `start + size`. + pub(crate) fn execute(&self, start: Address, size: usize) { + if let MetadataSpec::OnSide(ref unlog_bits) = *VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC { + match self { + UnlogBitsOperation::NoOp => {} + UnlogBitsOperation::BulkSet => { + unlog_bits.bset_metadata(start, size); + } + UnlogBitsOperation::BulkClear => { + unlog_bits.bzero_metadata(start, size); + } + } + } + } +} diff --git a/src/util/options.rs b/src/util/options.rs index fab347c153..fd8952d1da 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -50,6 +50,8 @@ pub enum PlanSelector { Compressor, /// An Immix collector that uses a sticky mark bit to allow generational behaviors without a copying nursery. StickyImmix, + /// Concurrent non-moving immix using SATB + ConcurrentImmix, } /// MMTk option for perf events diff --git a/src/util/reference_processor.rs b/src/util/reference_processor.rs index 3ca0486707..a5be7d3d4d 100644 --- a/src/util/reference_processor.rs +++ b/src/util/reference_processor.rs @@ -252,6 +252,11 @@ impl ReferenceProcessor { /// Inform the binding to enqueue the weak references whose referents were cleared in this GC. pub fn enqueue(&self, tls: VMWorkerThread) { + // We will acquire a lock below. If anyone tries to insert new weak refs which will acquire the same lock, a deadlock will occur. + // This does happen for OpenJDK with ConcurrentImmix where a write barrier is triggered during the enqueueing of weak references, + // and the write barrier scans the objects and attempts to add new weak references. + // Disallow new candidates to prevent the deadlock. + self.disallow_new_candidate(); let mut sync = self.sync.lock().unwrap(); // This is the end of a GC. We do some assertions here to make sure our reference tables are correct. diff --git a/src/vm/tests/mock_tests/mock_test_allocator_info.rs b/src/vm/tests/mock_tests/mock_test_allocator_info.rs index fc288e8041..a856278e94 100644 --- a/src/vm/tests/mock_tests/mock_test_allocator_info.rs +++ b/src/vm/tests/mock_tests/mock_test_allocator_info.rs @@ -29,6 +29,7 @@ pub fn test_allocator_info() { | PlanSelector::GenImmix | PlanSelector::MarkCompact | PlanSelector::Compressor + | PlanSelector::ConcurrentImmix | PlanSelector::StickyImmix => { // These plans all use bump pointer allocator. let AllocatorInfo::BumpPointer { diff --git a/tools/tracing/timeline/capture.bt b/tools/tracing/timeline/capture.bt index 31547c6e88..953ce72d2e 100644 --- a/tools/tracing/timeline/capture.bt +++ b/tools/tracing/timeline/capture.bt @@ -2,6 +2,7 @@ BEGIN { @harness = $HARNESS; @gc_count = 0; + @is_initial_mark = 0; if (!@harness) { //always collect data @@ -30,7 +31,6 @@ usdt:$MMTK:mmtk:harness_end { usdt:$MMTK:mmtk:gc_start { printf("GC,B,%d,%lu\n", tid, nsecs); - @gc_count += 1; // bpftrace warns that signed `%` operator may have undefiend behavior. if ((uint64)@gc_count % $EVERY == 0 && @stats_enabled) { @enable_print = 1; @@ -41,6 +41,14 @@ usdt:$MMTK:mmtk:gc_start { usdt:$MMTK:mmtk:gc_end { printf("GC,E,%d,%lu\n", tid, nsecs); + + // We don't increment the GC count so that we always visualize both InitialMark and FinalMark or neither. + // FIXME: mmtk-core should emit distinct events for GC end and pause end. + if (!@is_initial_mark) { + @gc_count += 1; + } + + @is_initial_mark = 0; } usdt:$MMTK:mmtk:bucket_opened { @@ -124,6 +132,27 @@ usdt:$MMTK:mmtk:sweep_chunk { } } +usdt:$MMTK:mmtk:concurrent_trace_objects { + if (@enable_print) { + printf("concurrent_trace_objects,meta,%d,%lu,%lu,%lu,%lu\n", tid, nsecs, arg0, arg1, arg2); + } +} + +usdt:$MMTK:mmtk:gcrequester_request { + printf("gcrequester_request,i,%d,%lu\n", tid, nsecs); +} + +usdt:$MMTK:mmtk:add_schedule_collection_packet { + printf("add_schedule_collection_packet,i,%d,%lu\n", tid, nsecs); +} + +usdt:$MMTK:mmtk:concurrent_pause_determined { + printf("concurrent_pause_determined,meta,%d,%lu,%lu\n", tid, nsecs, arg0); + if (arg0 == 2) { // InitialMark + @is_initial_mark = 1; + } +} + usdt:$MMTK:mmtk:finalization { if (@enable_print) { printf("finalization,meta,%d,%lu,%lu,%lu,%lu,%lu\n", tid, nsecs, arg0, arg1, arg2, arg3); diff --git a/tools/tracing/timeline/visualize.py b/tools/tracing/timeline/visualize.py index 70063861c9..24c7bd9338 100755 --- a/tools/tracing/timeline/visualize.py +++ b/tools/tracing/timeline/visualize.py @@ -22,6 +22,11 @@ class Semantics(Enum): WEAK = 1 PHANTOM = 2 +class Pause(Enum): + FULL = 1 + INITIAL_MARK = 2 + FINAL_MARK = 3 + def get_args(): parser = argparse.ArgumentParser( description=""" @@ -156,6 +161,9 @@ def enrich_event(self, name, ph, tid, ts, result, args): "stage": int(args[0]), } + case "gcrequester_request": + result["tid"] = 1 + case _: if self.enrich_event_extra is not None: # Call ``enrich_event_extra`` in the extension script if defined. @@ -251,6 +259,32 @@ def enrich_meta(self, name, tid, ts, gc, wp, args): } } + case "concurrent_trace_objects": + objects = int(args[0]) + next_objects = int(args[1]) + iterations = int(args[2]) + total_objects = objects + next_objects + wp["args"] |= { + # Put args in a group. See comments in "process_slots". + "scan_objects": { + "objects": objects, + "next_objects": next_objects, + "total_objects": total_objects, + "iterations": iterations, + } + } + + case "concurrent_pause_determined": + pause_int = int(args[0]) + if pause_int in Pause: + pause = Pause(pause_int).name + else: + pause = f"(Unknown:{pause_int})" + + gc["args"] |= { + "pause": pause, + } + case "sweep_chunk": wp["args"] |= { "allocated_blocks": int(args[0]),