diff --git a/src/global_state.rs b/src/global_state.rs
index b5a78d9bbe..b270d94924 100644
--- a/src/global_state.rs
+++ b/src/global_state.rs
@@ -49,6 +49,8 @@ pub struct GlobalState {
     pub(crate) malloc_bytes: AtomicUsize,
     /// This stores the live bytes and the used bytes (by pages) for each space in last GC. This counter is only updated in the GC release phase.
     pub(crate) live_bytes_in_last_gc: AtomicRefCell<HashMap<&'static str, LiveBytesStats>>,
+    /// The number of used pages at the end of the last GC. This can be used to estimate how many pages we have allocated since last GC.
+    pub(crate) used_pages_after_last_gc: AtomicUsize,
 }
 
 impl GlobalState {
@@ -184,6 +186,15 @@ impl GlobalState {
     pub(crate) fn decrease_malloc_bytes_by(&self, size: usize) {
         self.malloc_bytes.fetch_sub(size, Ordering::SeqCst);
     }
+
+    pub(crate) fn set_used_pages_after_last_gc(&self, pages: usize) {
+        self.used_pages_after_last_gc
+            .store(pages, Ordering::Relaxed);
+    }
+
+    pub(crate) fn get_used_pages_after_last_gc(&self) -> usize {
+        self.used_pages_after_last_gc.load(Ordering::Relaxed)
+    }
 }
 
 impl Default for GlobalState {
@@ -206,6 +217,7 @@ impl Default for GlobalState {
             #[cfg(feature = "malloc_counted_size")]
             malloc_bytes: AtomicUsize::new(0),
             live_bytes_in_last_gc: AtomicRefCell::new(HashMap::new()),
+            used_pages_after_last_gc: AtomicUsize::new(0),
         }
     }
 }
diff --git a/src/plan/barriers.rs b/src/plan/barriers.rs
index 56c069c982..5a0bf59493 100644
--- a/src/plan/barriers.rs
+++ b/src/plan/barriers.rs
@@ -19,8 +19,11 @@ use downcast_rs::Downcast;
 pub enum BarrierSelector {
     /// No barrier is used.
     NoBarrier,
-    /// Object remembering barrier is used.
+    /// Object remembering post-write barrier is used.
     ObjectBarrier,
+    /// Object remembering pre-write barrier with weak reference loading barrier.
+    // TODO: We might be able to generalize this to object remembering pre-write barrier.
+    SATBBarrier,
 }
 
 impl BarrierSelector {
@@ -43,8 +46,22 @@ impl BarrierSelector {
 /// As a performance optimization, the binding may also choose to port the fast-path to the VM side,
 /// and call the slow-path (`object_reference_write_slow`) only if necessary.
 pub trait Barrier<VM: VMBinding>: 'static + Send + Downcast {
+    /// Flush thread-local states like buffers or remembered sets.
     fn flush(&mut self) {}
 
+    /// Weak reference loading barrier.  A mutator should call this when loading from a weak
+    /// reference field, for example, when executing  `java.lang.ref.Reference.get()` in JVM, or
+    /// loading from a global weak table in CRuby.
+    ///
+    /// Note: Merely loading from a field holding weak reference into a local variable will create a
+    /// strong reference from the stack to the referent, changing its reachablilty from weakly
+    /// reachable to strongly reachable.  Concurrent garbage collectors may need to handle such
+    /// events specially.  See [SATBBarrier::load_weak_reference] for a concrete example.
+    ///
+    /// Arguments:
+    /// *   `referent`: The referent object which the weak reference is pointing to.
+    fn load_weak_reference(&mut self, _referent: ObjectReference) {}
+
     /// Subsuming barrier for object reference write
     fn object_reference_write(
         &mut self,
@@ -159,6 +176,9 @@ pub trait BarrierSemantics: 'static + Send {
 
     /// Object will probably be modified
     fn object_probable_write_slow(&mut self, _obj: ObjectReference) {}
+
+    /// Loading from a weak reference field
+    fn load_weak_reference(&mut self, _o: ObjectReference) {}
 }
 
 /// Generic object barrier with a type argument defining it's slow-path behaviour.
@@ -167,6 +187,7 @@ pub struct ObjectBarrier<S: BarrierSemantics> {
 }
 
 impl<S: BarrierSemantics> ObjectBarrier<S> {
+    /// Create a new ObjectBarrier with the given semantics.
     pub fn new(semantics: S) -> Self {
         Self { semantics }
     }
@@ -250,3 +271,91 @@ impl<S: BarrierSemantics> Barrier<S::VM> for ObjectBarrier<S> {
         }
     }
 }
+
+/// A SATB (Snapshot-At-The-Beginning) barrier implementation.
+/// This barrier is basically a pre-write object barrier with a weak reference loading barrier.
+pub struct SATBBarrier<S: BarrierSemantics> {
+    weak_ref_barrier_enabled: bool,
+    semantics: S,
+}
+
+impl<S: BarrierSemantics> SATBBarrier<S> {
+    /// Create a new SATBBarrier with the given semantics.
+    pub fn new(semantics: S) -> Self {
+        Self {
+            weak_ref_barrier_enabled: false,
+            semantics,
+        }
+    }
+
+    pub(crate) fn set_weak_ref_barrier_enabled(&mut self, value: bool) {
+        self.weak_ref_barrier_enabled = value;
+    }
+
+    fn object_is_unlogged(&self, object: ObjectReference) -> bool {
+        S::UNLOG_BIT_SPEC.load_atomic::<S::VM, u8>(object, None, Ordering::SeqCst) != 0
+    }
+}
+
+impl<S: BarrierSemantics> Barrier<S::VM> for SATBBarrier<S> {
+    fn flush(&mut self) {
+        self.semantics.flush();
+    }
+
+    fn load_weak_reference(&mut self, o: ObjectReference) {
+        if self.weak_ref_barrier_enabled {
+            self.semantics.load_weak_reference(o)
+        }
+    }
+
+    fn object_probable_write(&mut self, obj: ObjectReference) {
+        self.semantics.object_probable_write_slow(obj);
+    }
+
+    fn object_reference_write_pre(
+        &mut self,
+        src: ObjectReference,
+        slot: <S::VM as VMBinding>::VMSlot,
+        target: Option<ObjectReference>,
+    ) {
+        if self.object_is_unlogged(src) {
+            self.semantics
+                .object_reference_write_slow(src, slot, target);
+        }
+    }
+
+    fn object_reference_write_post(
+        &mut self,
+        _src: ObjectReference,
+        _slot: <S::VM as VMBinding>::VMSlot,
+        _target: Option<ObjectReference>,
+    ) {
+        unimplemented!()
+    }
+
+    fn object_reference_write_slow(
+        &mut self,
+        src: ObjectReference,
+        slot: <S::VM as VMBinding>::VMSlot,
+        target: Option<ObjectReference>,
+    ) {
+        self.semantics
+            .object_reference_write_slow(src, slot, target);
+    }
+
+    fn memory_region_copy_pre(
+        &mut self,
+        src: <S::VM as VMBinding>::VMMemorySlice,
+        dst: <S::VM as VMBinding>::VMMemorySlice,
+    ) {
+        self.semantics.memory_region_copy_slow(src, dst);
+    }
+
+    fn memory_region_copy_post(
+        &mut self,
+        _src: <S::VM as VMBinding>::VMMemorySlice,
+        _dst: <S::VM as VMBinding>::VMMemorySlice,
+    ) {
+        unimplemented!()
+    }
+}
diff --git a/src/plan/concurrent/barrier.rs b/src/plan/concurrent/barrier.rs
new file mode 100644
index 0000000000..0bd8995564
--- /dev/null
+++ b/src/plan/concurrent/barrier.rs
@@ -0,0 +1,163 @@
+use std::sync::atomic::Ordering;
+
+use super::{concurrent_marking_work::ProcessModBufSATB, Pause};
+use crate::plan::global::PlanTraceObject;
+use crate::policy::gc_work::TraceKind;
+use crate::util::VMMutatorThread;
+use crate::{
+    plan::{barriers::BarrierSemantics, concurrent::global::ConcurrentPlan, VectorQueue},
+    scheduler::WorkBucketStage,
+    util::ObjectReference,
+    vm::{
+        slot::{MemorySlice, Slot},
+        VMBinding,
+    },
+    MMTK,
+};
+
+pub struct SATBBarrierSemantics<
+    VM: VMBinding,
+    P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>,
+    const KIND: TraceKind,
+> {
+    mmtk: &'static MMTK<VM>,
+    tls: VMMutatorThread,
+    satb: VectorQueue<ObjectReference>,
+    refs: VectorQueue<ObjectReference>,
+    plan: &'static P,
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    SATBBarrierSemantics<VM, P, KIND>
+{
+    pub fn new(mmtk: &'static MMTK<VM>, tls: VMMutatorThread) -> Self {
+        Self {
+            mmtk,
+            tls,
+            satb: VectorQueue::default(),
+            refs: VectorQueue::default(),
+            plan: mmtk.get_plan().downcast_ref::<P>().unwrap(),
+        }
+    }
+
+    fn slow(&mut self, _src: Option<ObjectReference>, _slot: VM::VMSlot, old: ObjectReference) {
+        self.satb.push(old);
+        if self.satb.is_full() {
+            self.flush_satb();
+        }
+    }
+
+    fn enqueue_node(
+        &mut self,
+        src: Option<ObjectReference>,
+        slot: VM::VMSlot,
+        _new: Option<ObjectReference>,
+    ) -> bool {
+        if let Some(old) = slot.load() {
+            self.slow(src, slot, old);
+        }
+        true
+    }
+
+    /// Attempt to atomically log an object.
+    /// Returns true if the object is not logged previously.
+    fn log_object(&self, object: ObjectReference) -> bool {
+        Self::UNLOG_BIT_SPEC.store_atomic::<VM, u8>(object, 0, None, Ordering::SeqCst);
+        true
+    }
+
+    fn flush_satb(&mut self) {
+        if !self.satb.is_empty() {
+            if self.should_create_satb_packets() {
+                let satb = self.satb.take();
+                let bucket = if self.plan.concurrent_work_in_progress() {
+                    WorkBucketStage::Concurrent
+                } else {
+                    debug_assert_ne!(self.plan.current_pause(), Some(Pause::InitialMark));
+                    WorkBucketStage::Closure
+                };
+                self.mmtk.scheduler.work_buckets[bucket]
+                    .add(ProcessModBufSATB::<VM, P, KIND>::new(satb));
+            } else {
+                let _ = self.satb.take();
+            };
+        }
+    }
+
+    #[cold]
+    fn flush_weak_refs(&mut self) {
+        if !self.refs.is_empty() {
+            let nodes = self.refs.take();
+            let bucket = if self.plan.concurrent_work_in_progress() {
+                WorkBucketStage::Concurrent
+            } else {
+                debug_assert_ne!(self.plan.current_pause(), Some(Pause::InitialMark));
+                WorkBucketStage::Closure
+            };
+            self.mmtk.scheduler.work_buckets[bucket]
+                .add(ProcessModBufSATB::<VM, P, KIND>::new(nodes));
+        }
+    }
+
+    fn should_create_satb_packets(&self) -> bool {
+        self.plan.concurrent_work_in_progress()
+            || self.plan.current_pause() == Some(Pause::FinalMark)
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    BarrierSemantics for SATBBarrierSemantics<VM, P, KIND>
+{
+    type VM = VM;
+
+    #[cold]
+    fn flush(&mut self) {
+        self.flush_satb();
+        self.flush_weak_refs();
+    }
+
+    fn object_reference_write_slow(
+        &mut self,
+        src: ObjectReference,
+        _slot: <Self::VM as VMBinding>::VMSlot,
+        _target: Option<ObjectReference>,
+    ) {
+        self.object_probable_write_slow(src);
+        self.log_object(src);
+    }
+
+    fn memory_region_copy_slow(
+        &mut self,
+        _src: <Self::VM as VMBinding>::VMMemorySlice,
+        dst: <Self::VM as VMBinding>::VMMemorySlice,
+    ) {
+        for s in dst.iter_slots() {
+            self.enqueue_node(None, s, None);
+        }
+    }
+
+    /// Enqueue the referent during concurrent marking.
+    ///
+    /// Note: During concurrent marking, a collector based on snapshot-at-the-beginning (SATB) will
+    /// not reach objects that were weakly reachable at the time of `InitialMark`.  But if a mutator
+    /// loads from a weak reference field during concurrent marking, it will make the referent
+    /// strongly reachable, yet the referent is still not part of the SATB.  We must conservatively
+    /// enqueue the referent even though its reachability has not yet been established, otherwise it
+    /// (and its children) may be treated as garbage if it happened to be weakly reachable at the
+    /// time of `InitialMark`.
+    fn load_weak_reference(&mut self, o: ObjectReference) {
+        if !self.plan.concurrent_work_in_progress() {
+            return;
+        }
+        self.refs.push(o);
+        if self.refs.is_full() {
+            self.flush_weak_refs();
+        }
+    }
+
+    fn object_probable_write_slow(&mut self, obj: ObjectReference) {
+        crate::plan::tracing::SlotIterator::<VM>::iterate_fields(obj, self.tls.0, |s| {
+            self.enqueue_node(Some(obj), s, None);
+        });
+    }
+}
diff --git a/src/plan/concurrent/concurrent_marking_work.rs b/src/plan/concurrent/concurrent_marking_work.rs
new file mode 100644
index 0000000000..fca994a7bc
--- /dev/null
+++ b/src/plan/concurrent/concurrent_marking_work.rs
@@ -0,0 +1,298 @@
+use crate::plan::concurrent::global::ConcurrentPlan;
+use crate::plan::concurrent::Pause;
+use crate::plan::PlanTraceObject;
+use crate::plan::VectorQueue;
+use crate::policy::gc_work::TraceKind;
+use crate::scheduler::gc_work::{ScanObjects, SlotOf};
+use crate::util::ObjectReference;
+use crate::vm::slot::Slot;
+use crate::{
+    plan::ObjectQueue,
+    scheduler::{gc_work::ProcessEdgesBase, GCWork, GCWorker, ProcessEdgesWork, WorkBucketStage},
+    vm::*,
+    MMTK,
+};
+use std::ops::{Deref, DerefMut};
+
+pub struct ConcurrentTraceObjects<
+    VM: VMBinding,
+    P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>,
+    const KIND: TraceKind,
+> {
+    plan: &'static P,
+    // objects to mark and scan
+    objects: Option<Vec<ObjectReference>>,
+    // recursively generated objects
+    next_objects: VectorQueue<ObjectReference>,
+    worker: *mut GCWorker<VM>,
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    ConcurrentTraceObjects<VM, P, KIND>
+{
+    const SATB_BUFFER_SIZE: usize = 8192;
+
+    pub fn new(objects: Vec<ObjectReference>, mmtk: &'static MMTK<VM>) -> Self {
+        let plan = mmtk.get_plan().downcast_ref::<P>().unwrap();
+
+        Self {
+            plan,
+            objects: Some(objects),
+            next_objects: VectorQueue::default(),
+            worker: std::ptr::null_mut(),
+        }
+    }
+
+    pub fn worker(&self) -> &'static mut GCWorker<VM> {
+        debug_assert_ne!(self.worker, std::ptr::null_mut());
+        unsafe { &mut *self.worker }
+    }
+
+    #[cold]
+    fn flush(&mut self) {
+        if !self.next_objects.is_empty() {
+            let objects = self.next_objects.take();
+            let worker = self.worker();
+            let w = Self::new(objects, worker.mmtk);
+            worker.add_work(WorkBucketStage::Concurrent, w);
+        }
+    }
+
+    fn trace_object(&mut self, object: ObjectReference) -> ObjectReference {
+        let new_object = self
+            .plan
+            .trace_object::<Self, KIND>(self, object, self.worker());
+        // No copying should happen.
+        debug_assert_eq!(object, new_object);
+        object
+    }
+
+    fn trace_objects(&mut self, objects: &[ObjectReference]) {
+        for o in objects.iter() {
+            self.trace_object(*o);
+        }
+    }
+
+    fn scan_and_enqueue(&mut self, object: ObjectReference) {
+        crate::plan::tracing::SlotIterator::<VM>::iterate_fields(
+            object,
+            self.worker().tls.0,
+            |s| {
+                let Some(t) = s.load() else {
+                    return;
+                };
+
+                self.next_objects.push(t);
+                if self.next_objects.len() > Self::SATB_BUFFER_SIZE {
+                    self.flush();
+                }
+            },
+        );
+        self.plan.post_scan_object(object);
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    ObjectQueue for ConcurrentTraceObjects<VM, P, KIND>
+{
+    fn enqueue(&mut self, object: ObjectReference) {
+        debug_assert!(
+            object.to_raw_address().is_mapped(),
+            "Invalid obj {:?}: address is not mapped",
+            object
+        );
+        self.scan_and_enqueue(object);
+    }
+}
+
+unsafe impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    Send for ConcurrentTraceObjects<VM, P, KIND>
+{
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    GCWork<VM> for ConcurrentTraceObjects<VM, P, KIND>
+{
+    fn do_work(&mut self, worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
+        self.worker = worker;
+        let mut num_objects = 0;
+        let mut num_next_objects = 0;
+        let mut iterations = 0;
+        // mark objects
+        if let Some(objects) = self.objects.take() {
+            self.trace_objects(&objects);
+            num_objects = objects.len();
+        }
+        let pause_opt = self.plan.current_pause();
+        if pause_opt == Some(Pause::FinalMark) || pause_opt.is_none() {
+            while !self.next_objects.is_empty() {
+                let pause_opt = self.plan.current_pause();
+                if !(pause_opt == Some(Pause::FinalMark) || pause_opt.is_none()) {
+                    break;
+                }
+                let next_objects = self.next_objects.take();
+                self.trace_objects(&next_objects);
+                num_next_objects += next_objects.len();
+                iterations += 1;
+            }
+        }
+        probe!(
+            mmtk,
+            concurrent_trace_objects,
+            num_objects,
+            num_next_objects,
+            iterations
+        );
+        self.flush();
+    }
+}
+
+pub struct ProcessModBufSATB<
+    VM: VMBinding,
+    P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>,
+    const KIND: TraceKind,
+> {
+    nodes: Option<Vec<ObjectReference>>,
+    _p: std::marker::PhantomData<(VM, P)>,
+}
+
+unsafe impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    Send for ProcessModBufSATB<VM, P, KIND>
+{
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    ProcessModBufSATB<VM, P, KIND>
+{
+    pub fn new(nodes: Vec<ObjectReference>) -> Self {
+        Self {
+            nodes: Some(nodes),
+            _p: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    GCWork<VM> for ProcessModBufSATB<VM, P, KIND>
+{
+    fn do_work(&mut self, worker: &mut GCWorker<VM>, mmtk: &'static MMTK<VM>) {
+        let mut w = if let Some(nodes) = self.nodes.take() {
+            if nodes.is_empty() {
+                return;
+            }
+
+            ConcurrentTraceObjects::<VM, P, KIND>::new(nodes, mmtk)
+        } else {
+            return;
+        };
+        GCWork::do_work(&mut w, worker, mmtk);
+    }
+}
+
+pub struct ProcessRootSlots<
+    VM: VMBinding,
+    P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>,
+    const KIND: TraceKind,
+> {
+    base: ProcessEdgesBase<VM>,
+    _p: std::marker::PhantomData<P>,
+}
+
+unsafe impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    Send for ProcessRootSlots<VM, P, KIND>
+{
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    ProcessRootSlots<VM, P, KIND>
+{
+    fn create_and_schedule_concurrent_trace_objects_work(&self, objects: Vec<ObjectReference>) {
+        let worker = self.worker();
+        let mmtk = self.mmtk();
+        let w = ConcurrentTraceObjects::<VM, P, KIND>::new(objects.clone(), mmtk);
+
+        worker.scheduler().work_buckets[WorkBucketStage::Concurrent].add_no_notify(w);
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    ProcessEdgesWork for ProcessRootSlots<VM, P, KIND>
+{
+    type VM = VM;
+    type ScanObjectsWorkType = ScanObjects<Self>;
+    const OVERWRITE_REFERENCE: bool = false;
+    const SCAN_OBJECTS_IMMEDIATELY: bool = true;
+
+    fn new(
+        slots: Vec<SlotOf<Self>>,
+        roots: bool,
+        mmtk: &'static MMTK<VM>,
+        bucket: WorkBucketStage,
+    ) -> Self {
+        debug_assert!(roots);
+        let base = ProcessEdgesBase::new(slots, roots, mmtk, bucket);
+        Self {
+            base,
+            _p: std::marker::PhantomData,
+        }
+    }
+
+    fn flush(&mut self) {}
+
+    fn trace_object(&mut self, _object: ObjectReference) -> ObjectReference {
+        unreachable!()
+    }
+
+    fn process_slots(&mut self) {
+        let pause = self
+            .base
+            .plan()
+            .concurrent()
+            .unwrap()
+            .current_pause()
+            .unwrap();
+        // No need to scan roots in the final mark
+        if pause == Pause::FinalMark {
+            return;
+        }
+        debug_assert_eq!(pause, Pause::InitialMark);
+        let mut root_objects = Vec::with_capacity(Self::CAPACITY);
+        if !self.slots.is_empty() {
+            let slots = std::mem::take(&mut self.slots);
+            for slot in slots {
+                if let Some(object) = slot.load() {
+                    root_objects.push(object);
+                    if root_objects.len() == Self::CAPACITY {
+                        let mut buffer = Vec::with_capacity(Self::CAPACITY);
+                        std::mem::swap(&mut buffer, &mut root_objects);
+                        self.create_and_schedule_concurrent_trace_objects_work(buffer);
+                    }
+                }
+            }
+            if !root_objects.is_empty() {
+                self.create_and_schedule_concurrent_trace_objects_work(root_objects);
+            }
+        }
+    }
+
+    fn create_scan_work(&self, _nodes: Vec<ObjectReference>) -> Self::ScanObjectsWorkType {
+        unimplemented!()
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind> Deref
+    for ProcessRootSlots<VM, P, KIND>
+{
+    type Target = ProcessEdgesBase<VM>;
+    fn deref(&self) -> &Self::Target {
+        &self.base
+    }
+}
+
+impl<VM: VMBinding, P: ConcurrentPlan<VM = VM> + PlanTraceObject<VM>, const KIND: TraceKind>
+    DerefMut for ProcessRootSlots<VM, P, KIND>
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.base
+    }
+}
diff --git a/src/plan/concurrent/global.rs b/src/plan/concurrent/global.rs
new file mode 100644
index 0000000000..bcfbb40d2d
--- /dev/null
+++ b/src/plan/concurrent/global.rs
@@ -0,0 +1,10 @@
+use crate::plan::concurrent::Pause;
+use crate::plan::Plan;
+
+/// Trait for a concurrent plan.
+pub trait ConcurrentPlan: Plan {
+    /// Return `true`` if concurrent work (such as concurrent marking) is in progress.
+    fn concurrent_work_in_progress(&self) -> bool;
+    /// Return the current pause kind.  `None` if not in a pause.
+    fn current_pause(&self) -> Option<Pause>;
+}
diff --git a/src/plan/concurrent/immix/gc_work.rs b/src/plan/concurrent/immix/gc_work.rs
new file mode 100644
index 0000000000..6372105313
--- /dev/null
+++ b/src/plan/concurrent/immix/gc_work.rs
@@ -0,0 +1,25 @@
+use crate::plan::concurrent::immix::global::ConcurrentImmix;
+use crate::policy::gc_work::{TraceKind, TRACE_KIND_TRANSITIVE_PIN};
+use crate::scheduler::gc_work::{PlanProcessEdges, UnsupportedProcessEdges};
+use crate::scheduler::ProcessEdgesWork;
+use crate::vm::VMBinding;
+
+pub(super) struct ConcurrentImmixSTWGCWorkContext<VM: VMBinding, const KIND: TraceKind>(
+    std::marker::PhantomData<VM>,
+);
+impl<VM: VMBinding, const KIND: TraceKind> crate::scheduler::GCWorkContext
+    for ConcurrentImmixSTWGCWorkContext<VM, KIND>
+{
+    type VM = VM;
+    type PlanType = ConcurrentImmix<VM>;
+    type DefaultProcessEdges = PlanProcessEdges<VM, ConcurrentImmix<VM>, KIND>;
+    type PinningProcessEdges = PlanProcessEdges<VM, ConcurrentImmix<VM>, TRACE_KIND_TRANSITIVE_PIN>;
+}
+pub(super) struct ConcurrentImmixGCWorkContext<E: ProcessEdgesWork>(std::marker::PhantomData<E>);
+
+impl<E: ProcessEdgesWork> crate::scheduler::GCWorkContext for ConcurrentImmixGCWorkContext<E> {
+    type VM = E::VM;
+    type PlanType = ConcurrentImmix<E::VM>;
+    type DefaultProcessEdges = E;
+    type PinningProcessEdges = UnsupportedProcessEdges<Self::VM>;
+}
diff --git a/src/plan/concurrent/immix/global.rs b/src/plan/concurrent/immix/global.rs
new file mode 100644
index 0000000000..118132f46d
--- /dev/null
+++ b/src/plan/concurrent/immix/global.rs
@@ -0,0 +1,442 @@
+use crate::plan::concurrent::concurrent_marking_work::ProcessRootSlots;
+use crate::plan::concurrent::global::ConcurrentPlan;
+use crate::plan::concurrent::immix::gc_work::ConcurrentImmixGCWorkContext;
+use crate::plan::concurrent::immix::gc_work::ConcurrentImmixSTWGCWorkContext;
+use crate::plan::concurrent::Pause;
+use crate::plan::global::BasePlan;
+use crate::plan::global::CommonPlan;
+use crate::plan::global::CreateGeneralPlanArgs;
+use crate::plan::global::CreateSpecificPlanArgs;
+use crate::plan::immix::mutator::ALLOCATOR_MAPPING;
+use crate::plan::AllocationSemantics;
+use crate::plan::Plan;
+use crate::plan::PlanConstraints;
+use crate::policy::immix::defrag::StatsForDefrag;
+use crate::policy::immix::ImmixSpaceArgs;
+use crate::policy::immix::TRACE_KIND_DEFRAG;
+use crate::policy::immix::TRACE_KIND_FAST;
+use crate::policy::space::Space;
+use crate::scheduler::gc_work::Release;
+use crate::scheduler::gc_work::StopMutators;
+use crate::scheduler::gc_work::UnsupportedProcessEdges;
+use crate::scheduler::*;
+use crate::util::alloc::allocators::AllocatorSelector;
+use crate::util::copy::*;
+use crate::util::heap::gc_trigger::SpaceStats;
+use crate::util::heap::VMRequest;
+use crate::util::metadata::log_bit::UnlogBitsOperation;
+use crate::util::metadata::side_metadata::SideMetadataContext;
+use crate::vm::ObjectModel;
+use crate::vm::VMBinding;
+use crate::{policy::immix::ImmixSpace, util::opaque_pointer::VMWorkerThread};
+use std::sync::atomic::AtomicBool;
+
+use atomic::Atomic;
+use atomic::Ordering;
+use enum_map::EnumMap;
+
+use mmtk_macros::{HasSpaces, PlanTraceObject};
+
+/// A concurrent Immix plan. The plan supports concurrent collection (strictly non-moving) and STW full heap collection (which may do defrag).
+/// The concurrent GC consists of two STW pauses (initial mark and final mark) with concurrent marking in between.
+#[derive(HasSpaces, PlanTraceObject)]
+pub struct ConcurrentImmix<VM: VMBinding> {
+    #[post_scan]
+    #[space]
+    #[copy_semantics(CopySemantics::DefaultCopy)]
+    pub immix_space: ImmixSpace<VM>,
+    #[parent]
+    pub common: CommonPlan<VM>,
+    last_gc_was_defrag: AtomicBool,
+    current_pause: Atomic<Option<Pause>>,
+    previous_pause: Atomic<Option<Pause>>,
+    should_do_full_gc: AtomicBool,
+    concurrent_marking_active: AtomicBool,
+}
+
+/// The plan constraints for the concurrent immix plan.
+pub const CONCURRENT_IMMIX_CONSTRAINTS: PlanConstraints = PlanConstraints {
+    // If we disable moving in Immix, this is a non-moving plan.
+    moves_objects: !cfg!(feature = "immix_non_moving"),
+    // Max immix object size is half of a block.
+    max_non_los_default_alloc_bytes: crate::policy::immix::MAX_IMMIX_OBJECT_SIZE,
+    needs_prepare_mutator: true,
+    barrier: crate::BarrierSelector::SATBBarrier,
+    needs_log_bit: true,
+    ..PlanConstraints::default()
+};
+
+impl<VM: VMBinding> Plan for ConcurrentImmix<VM> {
+    fn collection_required(&self, space_full: bool, _space: Option<SpaceStats<Self::VM>>) -> bool {
+        if self.base().collection_required(self, space_full) {
+            self.should_do_full_gc.store(true, Ordering::Release);
+            info!("Triggering full GC");
+            return true;
+        }
+
+        let concurrent_marking_in_progress = self.concurrent_marking_in_progress();
+
+        if concurrent_marking_in_progress
+            && self.common.base.scheduler.work_buckets[WorkBucketStage::Concurrent].is_drained()
+        {
+            // After the Concurrent bucket is drained during concurrent marking,
+            // we trigger the FinalMark pause at the next poll() site (here).
+            // FIXME: Immediately trigger FinalMark when the Concurrent bucket is drained.
+            return true;
+        }
+
+        let threshold = self.get_total_pages() >> 1;
+        let used_pages_after_last_gc = self.common.base.global_state.get_used_pages_after_last_gc();
+        let used_pages_now = self.get_used_pages();
+        let allocated = used_pages_now.saturating_sub(used_pages_after_last_gc);
+        if !concurrent_marking_in_progress && allocated > threshold {
+            info!("Allocated {allocated} pages since last GC ({used_pages_now} - {used_pages_after_last_gc} > {threshold}): Do concurrent marking");
+            debug_assert!(
+                self.common.base.scheduler.work_buckets[WorkBucketStage::Concurrent].is_empty()
+            );
+            debug_assert!(!self.concurrent_marking_in_progress());
+            debug_assert_ne!(self.previous_pause(), Some(Pause::InitialMark));
+            return true;
+        }
+        false
+    }
+
+    fn last_collection_was_exhaustive(&self) -> bool {
+        self.immix_space
+            .is_last_gc_exhaustive(self.last_gc_was_defrag.load(Ordering::Relaxed))
+    }
+
+    fn constraints(&self) -> &'static PlanConstraints {
+        &CONCURRENT_IMMIX_CONSTRAINTS
+    }
+
+    fn create_copy_config(&'static self) -> CopyConfig<Self::VM> {
+        use enum_map::enum_map;
+        CopyConfig {
+            copy_mapping: enum_map! {
+                CopySemantics::DefaultCopy => CopySelector::Immix(0),
+                _ => CopySelector::Unused,
+            },
+            space_mapping: vec![(CopySelector::Immix(0), &self.immix_space)],
+            constraints: &CONCURRENT_IMMIX_CONSTRAINTS,
+        }
+    }
+
+    fn schedule_collection(&'static self, scheduler: &GCWorkScheduler<VM>) {
+        let pause = if self.concurrent_marking_in_progress() {
+            // FIXME: Currently it is unsafe to bypass `FinalMark` and go directly from `InitialMark` to `Full`.
+            // It is related to defragmentation.  See https://github.com/mmtk/mmtk-core/issues/1357 for more details.
+            // We currently force `FinalMark` to happen if the last pause is `InitialMark`.
+            Pause::FinalMark
+        } else if self.should_do_full_gc.load(Ordering::SeqCst) {
+            Pause::Full
+        } else {
+            Pause::InitialMark
+        };
+
+        self.current_pause.store(Some(pause), Ordering::SeqCst);
+
+        probe!(mmtk, concurrent_pause_determined, pause as usize);
+
+        match pause {
+            Pause::Full => {
+                // Ref closure buckets is disabled by initial mark, and needs to be re-enabled for full GC before
+                // we reuse the normal Immix scheduling.
+                self.set_ref_closure_buckets_enabled(true);
+                crate::plan::immix::global::Immix::schedule_immix_full_heap_collection::<
+                    ConcurrentImmix<VM>,
+                    ConcurrentImmixSTWGCWorkContext<VM, TRACE_KIND_FAST>,
+                    ConcurrentImmixSTWGCWorkContext<VM, TRACE_KIND_DEFRAG>,
+                >(self, &self.immix_space, scheduler);
+            }
+            Pause::InitialMark => self.schedule_concurrent_marking_initial_pause(scheduler),
+            Pause::FinalMark => self.schedule_concurrent_marking_final_pause(scheduler),
+        }
+    }
+
+    fn get_allocator_mapping(&self) -> &'static EnumMap<AllocationSemantics, AllocatorSelector> {
+        &ALLOCATOR_MAPPING
+    }
+
+    fn prepare(&mut self, tls: VMWorkerThread) {
+        let pause = self.current_pause().unwrap();
+        match pause {
+            Pause::Full => {
+                self.common.prepare(tls, true);
+                self.immix_space.prepare(
+                    true,
+                    Some(StatsForDefrag::new(self)),
+                    // Ignore unlog bits in full GCs because unlog bits should be all 0.
+                    UnlogBitsOperation::NoOp,
+                );
+            }
+            Pause::InitialMark => {
+                self.immix_space.prepare(
+                    true,
+                    Some(StatsForDefrag::new(self)),
+                    // Bulk set log bits so SATB barrier will be triggered on the existing objects.
+                    UnlogBitsOperation::BulkSet,
+                );
+
+                self.common.prepare(tls, true);
+                // Bulk set log bits so SATB barrier will be triggered on the existing objects.
+                self.common
+                    .schedule_unlog_bits_op(UnlogBitsOperation::BulkSet);
+            }
+            Pause::FinalMark => (),
+        }
+    }
+
+    fn release(&mut self, tls: VMWorkerThread) {
+        let pause = self.current_pause().unwrap();
+        match pause {
+            Pause::InitialMark => (),
+            Pause::Full | Pause::FinalMark => {
+                self.immix_space.release(
+                    true,
+                    // Bulk clear log bits so SATB barrier will not be triggered.
+                    UnlogBitsOperation::BulkClear,
+                );
+
+                self.common.release(tls, true);
+
+                if pause == Pause::FinalMark {
+                    // Bulk clear log bits so SATB barrier will not be triggered.
+                    self.common
+                        .schedule_unlog_bits_op(UnlogBitsOperation::BulkClear);
+                } else {
+                    // Full pauses didn't set unlog bits in the first place,
+                    // so there is no need to clear them.
+                    // TODO: Currently InitialMark must be followed by a FinalMark.
+                    // If we allow upgrading a concurrent GC to a full STW GC,
+                    // we will need to clear the unlog bits at an appropriate place.
+                }
+            }
+        }
+    }
+
+    fn end_of_gc(&mut self, _tls: VMWorkerThread) {
+        self.last_gc_was_defrag
+            .store(self.immix_space.end_of_gc(), Ordering::Relaxed);
+
+        let pause = self.current_pause().unwrap();
+        if pause == Pause::InitialMark {
+            self.set_concurrent_marking_state(true);
+        }
+        self.previous_pause.store(Some(pause), Ordering::SeqCst);
+        self.current_pause.store(None, Ordering::SeqCst);
+        if pause != Pause::FinalMark {
+            self.should_do_full_gc.store(false, Ordering::SeqCst);
+        } else {
+            // FIXME: Currently it is unsafe to trigger full GC during concurrent marking.
+            // See `Self::schedule_collection`.
+            // We keep the value of `self.should_do_full_gc` so that if full GC is triggered,
+            // the next GC will be full GC.
+        }
+        info!("{:?} end", pause);
+    }
+
+    fn current_gc_may_move_object(&self) -> bool {
+        self.immix_space.in_defrag()
+    }
+
+    fn get_collection_reserved_pages(&self) -> usize {
+        self.immix_space.defrag_headroom_pages()
+    }
+
+    fn get_used_pages(&self) -> usize {
+        self.immix_space.reserved_pages() + self.common.get_used_pages()
+    }
+
+    fn base(&self) -> &BasePlan<VM> {
+        &self.common.base
+    }
+
+    fn base_mut(&mut self) -> &mut BasePlan<Self::VM> {
+        &mut self.common.base
+    }
+
+    fn common(&self) -> &CommonPlan<VM> {
+        &self.common
+    }
+
+    fn notify_mutators_paused(&self, _scheduler: &GCWorkScheduler<VM>) {
+        use crate::vm::ActivePlan;
+        let pause = self.current_pause().unwrap();
+        match pause {
+            Pause::Full => {
+                self.set_concurrent_marking_state(false);
+            }
+            Pause::InitialMark => {
+                debug_assert!(
+                    !self.concurrent_marking_in_progress(),
+                    "prev pause: {:?}",
+                    self.previous_pause().unwrap()
+                );
+            }
+            Pause::FinalMark => {
+                debug_assert!(self.concurrent_marking_in_progress());
+                // Flush barrier buffers
+                for mutator in <VM as VMBinding>::VMActivePlan::mutators() {
+                    mutator.barrier.flush();
+                }
+                self.set_concurrent_marking_state(false);
+            }
+        }
+        info!("{:?} start", pause);
+    }
+
+    fn concurrent(&self) -> Option<&dyn ConcurrentPlan<VM = VM>> {
+        Some(self)
+    }
+}
+
+impl<VM: VMBinding> ConcurrentImmix<VM> {
+    pub fn new(args: CreateGeneralPlanArgs<VM>) -> Self {
+        let spec = crate::util::metadata::extract_side_metadata(&[
+            *VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC,
+        ]);
+
+        let mut plan_args = CreateSpecificPlanArgs {
+            global_args: args,
+            constraints: &CONCURRENT_IMMIX_CONSTRAINTS,
+            global_side_metadata_specs: SideMetadataContext::new_global_specs(&spec),
+        };
+
+        let immix_args = ImmixSpaceArgs {
+            mixed_age: false,
+            never_move_objects: false,
+        };
+
+        // These buckets are not used in an Immix plan. We can simply disable them.
+        // TODO: We should be more systmatic on this, and disable unnecessary buckets for other plans as well.
+        let scheduler = &plan_args.global_args.scheduler;
+        scheduler.work_buckets[WorkBucketStage::VMRefForwarding].set_enabled(false);
+        scheduler.work_buckets[WorkBucketStage::CalculateForwarding].set_enabled(false);
+        scheduler.work_buckets[WorkBucketStage::SecondRoots].set_enabled(false);
+        scheduler.work_buckets[WorkBucketStage::RefForwarding].set_enabled(false);
+        scheduler.work_buckets[WorkBucketStage::FinalizableForwarding].set_enabled(false);
+        scheduler.work_buckets[WorkBucketStage::Compact].set_enabled(false);
+
+        let immix = ConcurrentImmix {
+            immix_space: ImmixSpace::new(
+                plan_args.get_normal_space_args("immix", true, false, VMRequest::discontiguous()),
+                immix_args,
+            ),
+            common: CommonPlan::new(plan_args),
+            last_gc_was_defrag: AtomicBool::new(false),
+            current_pause: Atomic::new(None),
+            previous_pause: Atomic::new(None),
+            should_do_full_gc: AtomicBool::new(false),
+            concurrent_marking_active: AtomicBool::new(false),
+        };
+
+        immix.verify_side_metadata_sanity();
+
+        immix
+    }
+
+    fn set_ref_closure_buckets_enabled(&self, do_closure: bool) {
+        let scheduler = &self.common.base.scheduler;
+        scheduler.work_buckets[WorkBucketStage::VMRefClosure].set_enabled(do_closure);
+        scheduler.work_buckets[WorkBucketStage::WeakRefClosure].set_enabled(do_closure);
+        scheduler.work_buckets[WorkBucketStage::FinalRefClosure].set_enabled(do_closure);
+        scheduler.work_buckets[WorkBucketStage::SoftRefClosure].set_enabled(do_closure);
+        scheduler.work_buckets[WorkBucketStage::PhantomRefClosure].set_enabled(do_closure);
+    }
+
+    pub(crate) fn schedule_concurrent_marking_initial_pause(
+        &'static self,
+        scheduler: &GCWorkScheduler<VM>,
+    ) {
+        use crate::scheduler::gc_work::Prepare;
+
+        self.set_ref_closure_buckets_enabled(false);
+
+        scheduler.work_buckets[WorkBucketStage::Unconstrained].add(StopMutators::<
+            ConcurrentImmixGCWorkContext<ProcessRootSlots<VM, Self, TRACE_KIND_FAST>>,
+        >::new());
+        scheduler.work_buckets[WorkBucketStage::Prepare].add(Prepare::<
+            ConcurrentImmixGCWorkContext<UnsupportedProcessEdges<VM>>,
+        >::new(self));
+    }
+
+    fn schedule_concurrent_marking_final_pause(&'static self, scheduler: &GCWorkScheduler<VM>) {
+        self.set_ref_closure_buckets_enabled(true);
+
+        // Skip root scanning in the final mark
+        scheduler.work_buckets[WorkBucketStage::Unconstrained].add(StopMutators::<
+            ConcurrentImmixGCWorkContext<ProcessRootSlots<VM, Self, TRACE_KIND_FAST>>,
+        >::new_no_scan_roots());
+
+        scheduler.work_buckets[WorkBucketStage::Release].add(Release::<
+            ConcurrentImmixGCWorkContext<UnsupportedProcessEdges<VM>>,
+        >::new(self));
+
+        // Deal with weak ref and finalizers
+        // TODO: Check against schedule_common_work and see if we are still missing any work packet
+        type RefProcessingEdges<VM> =
+            crate::scheduler::gc_work::PlanProcessEdges<VM, ConcurrentImmix<VM>, TRACE_KIND_FAST>;
+        // Reference processing
+        if !*self.base().options.no_reference_types {
+            use crate::util::reference_processor::{
+                PhantomRefProcessing, SoftRefProcessing, WeakRefProcessing,
+            };
+            scheduler.work_buckets[WorkBucketStage::SoftRefClosure]
+                .add(SoftRefProcessing::<RefProcessingEdges<VM>>::new());
+            scheduler.work_buckets[WorkBucketStage::WeakRefClosure]
+                .add(WeakRefProcessing::<VM>::new());
+            scheduler.work_buckets[WorkBucketStage::PhantomRefClosure]
+                .add(PhantomRefProcessing::<VM>::new());
+
+            use crate::util::reference_processor::RefEnqueue;
+            scheduler.work_buckets[WorkBucketStage::Release].add(RefEnqueue::<VM>::new());
+        }
+
+        // Finalization
+        if !*self.base().options.no_finalizer {
+            use crate::util::finalizable_processor::Finalization;
+            // finalization
+            scheduler.work_buckets[WorkBucketStage::FinalRefClosure]
+                .add(Finalization::<RefProcessingEdges<VM>>::new());
+        }
+    }
+
+    pub fn concurrent_marking_in_progress(&self) -> bool {
+        self.concurrent_marking_active.load(Ordering::Acquire)
+    }
+
+    fn set_concurrent_marking_state(&self, active: bool) {
+        use crate::plan::global::HasSpaces;
+
+        // Tell the spaces to allocate new objects as live
+        let allocate_object_as_live = active;
+        self.for_each_space(&mut |space: &dyn Space<VM>| {
+            space.set_allocate_as_live(allocate_object_as_live);
+        });
+
+        // Store the state.
+        self.concurrent_marking_active
+            .store(active, Ordering::SeqCst);
+
+        // We also set SATB barrier as active -- this is done in Mutator prepare/release.
+    }
+
+    pub(super) fn is_concurrent_marking_active(&self) -> bool {
+        self.concurrent_marking_active.load(Ordering::SeqCst)
+    }
+
+    fn previous_pause(&self) -> Option<Pause> {
+        self.previous_pause.load(Ordering::SeqCst)
+    }
+}
+
+impl<VM: VMBinding> ConcurrentPlan for ConcurrentImmix<VM> {
+    fn current_pause(&self) -> Option<Pause> {
+        self.current_pause.load(Ordering::SeqCst)
+    }
+
+    fn concurrent_work_in_progress(&self) -> bool {
+        self.concurrent_marking_in_progress()
+    }
+}
diff --git a/src/plan/concurrent/immix/mod.rs b/src/plan/concurrent/immix/mod.rs
new file mode 100644
index 0000000000..0f55961897
--- /dev/null
+++ b/src/plan/concurrent/immix/mod.rs
@@ -0,0 +1,7 @@
+//! Plan: concurrent immix
+
+pub(in crate::plan) mod gc_work;
+pub(in crate::plan) mod global;
+pub(in crate::plan) mod mutator;
+
+pub use global::ConcurrentImmix;
diff --git a/src/plan/concurrent/immix/mutator.rs b/src/plan/concurrent/immix/mutator.rs
new file mode 100644
index 0000000000..291c63e72f
--- /dev/null
+++ b/src/plan/concurrent/immix/mutator.rs
@@ -0,0 +1,130 @@
+use crate::plan::barriers::SATBBarrier;
+use crate::plan::concurrent::barrier::SATBBarrierSemantics;
+use crate::plan::concurrent::immix::ConcurrentImmix;
+use crate::plan::concurrent::Pause;
+use crate::plan::mutator_context::create_allocator_mapping;
+use crate::plan::mutator_context::create_space_mapping;
+
+use crate::plan::mutator_context::Mutator;
+use crate::plan::mutator_context::MutatorBuilder;
+use crate::plan::mutator_context::MutatorConfig;
+use crate::plan::mutator_context::ReservedAllocators;
+use crate::plan::AllocationSemantics;
+use crate::util::alloc::allocators::AllocatorSelector;
+use crate::util::alloc::ImmixAllocator;
+use crate::util::opaque_pointer::{VMMutatorThread, VMWorkerThread};
+use crate::vm::VMBinding;
+use crate::MMTK;
+use enum_map::EnumMap;
+
+type BarrierSemanticsType<VM> =
+    SATBBarrierSemantics<VM, ConcurrentImmix<VM>, { crate::policy::immix::TRACE_KIND_FAST }>;
+
+type BarrierType<VM> = SATBBarrier<BarrierSemanticsType<VM>>;
+
+pub fn concurrent_immix_mutator_release<VM: VMBinding>(
+    mutator: &mut Mutator<VM>,
+    _tls: VMWorkerThread,
+) {
+    // Release is not scheduled for initial mark pause
+    let current_pause = mutator.plan.concurrent().unwrap().current_pause().unwrap();
+    debug_assert_ne!(current_pause, Pause::InitialMark);
+
+    let immix_allocator = unsafe {
+        mutator
+            .allocators
+            .get_allocator_mut(mutator.config.allocator_mapping[AllocationSemantics::Default])
+    }
+    .downcast_mut::<ImmixAllocator<VM>>()
+    .unwrap();
+    immix_allocator.reset();
+
+    // Deactivate SATB
+    if current_pause == Pause::Full || current_pause == Pause::FinalMark {
+        debug!("Deactivate SATB barrier active for {:?}", mutator as *mut _);
+        mutator
+            .barrier
+            .downcast_mut::<BarrierType<VM>>()
+            .unwrap()
+            .set_weak_ref_barrier_enabled(false);
+    }
+}
+
+pub fn concurent_immix_mutator_prepare<VM: VMBinding>(
+    mutator: &mut Mutator<VM>,
+    _tls: VMWorkerThread,
+) {
+    // Prepare is not scheduled for final mark pause
+    let current_pause = mutator.plan.concurrent().unwrap().current_pause().unwrap();
+    debug_assert_ne!(current_pause, Pause::FinalMark);
+
+    let immix_allocator = unsafe {
+        mutator
+            .allocators
+            .get_allocator_mut(mutator.config.allocator_mapping[AllocationSemantics::Default])
+    }
+    .downcast_mut::<ImmixAllocator<VM>>()
+    .unwrap();
+    immix_allocator.reset();
+
+    // Activate SATB
+    if current_pause == Pause::InitialMark {
+        debug!("Activate SATB barrier active for {:?}", mutator as *mut _);
+        mutator
+            .barrier
+            .downcast_mut::<BarrierType<VM>>()
+            .unwrap()
+            .set_weak_ref_barrier_enabled(true);
+    }
+}
+
+pub(in crate::plan) const RESERVED_ALLOCATORS: ReservedAllocators = ReservedAllocators {
+    n_immix: 1,
+    ..ReservedAllocators::DEFAULT
+};
+
+lazy_static! {
+    pub static ref ALLOCATOR_MAPPING: EnumMap<AllocationSemantics, AllocatorSelector> = {
+        let mut map = create_allocator_mapping(RESERVED_ALLOCATORS, true);
+        map[AllocationSemantics::Default] = AllocatorSelector::Immix(0);
+        map
+    };
+}
+
+pub fn create_concurrent_immix_mutator<VM: VMBinding>(
+    mutator_tls: VMMutatorThread,
+    mmtk: &'static MMTK<VM>,
+) -> Mutator<VM> {
+    let immix = mmtk
+        .get_plan()
+        .downcast_ref::<ConcurrentImmix<VM>>()
+        .unwrap();
+    let config = MutatorConfig {
+        allocator_mapping: &ALLOCATOR_MAPPING,
+        space_mapping: Box::new({
+            let mut vec = create_space_mapping(RESERVED_ALLOCATORS, true, immix);
+            vec.push((AllocatorSelector::Immix(0), &immix.immix_space));
+            vec
+        }),
+
+        prepare_func: &concurent_immix_mutator_prepare,
+        release_func: &concurrent_immix_mutator_release,
+    };
+
+    let builder = MutatorBuilder::new(mutator_tls, mmtk, config);
+    let mut mutator = builder
+        .barrier(Box::new(SATBBarrier::new(BarrierSemanticsType::<VM>::new(
+            mmtk,
+            mutator_tls,
+        ))))
+        .build();
+
+    // Set barrier active, based on whether concurrent marking is in progress
+    mutator
+        .barrier
+        .downcast_mut::<BarrierType<VM>>()
+        .unwrap()
+        .set_weak_ref_barrier_enabled(immix.is_concurrent_marking_active());
+
+    mutator
+}
diff --git a/src/plan/concurrent/mod.rs b/src/plan/concurrent/mod.rs
new file mode 100644
index 0000000000..1c0a5b6a09
--- /dev/null
+++ b/src/plan/concurrent/mod.rs
@@ -0,0 +1,34 @@
+pub mod barrier;
+pub(super) mod concurrent_marking_work;
+pub(super) mod global;
+
+pub mod immix;
+
+use bytemuck::NoUninit;
+
+/// The pause type for a concurrent GC phase.
+// TODO: This is probably not be general enough for all the concurrent plans.
+// TODO: We could consider moving this to specific plans later.
+#[repr(u8)]
+#[derive(Debug, PartialEq, Eq, Copy, Clone, NoUninit)]
+pub enum Pause {
+    /// A whole GC (including root scanning, closure, releasing, etc.) happening in a single pause.
+    ///
+    /// Don't be confused with "full-heap" GC in generational collectors.  `Pause::Full` can also
+    /// refer to a nursery GC that happens in a single pause.
+    Full = 1,
+    /// The initial pause before concurrent marking.
+    InitialMark,
+    /// The pause after concurrent marking.
+    FinalMark,
+}
+
+unsafe impl bytemuck::ZeroableInOption for Pause {}
+
+unsafe impl bytemuck::PodInOption for Pause {}
+
+impl Default for Pause {
+    fn default() -> Self {
+        Self::Full
+    }
+}
diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs
index e3a8462f96..944558184d 100644
--- a/src/plan/gc_requester.rs
+++ b/src/plan/gc_requester.rs
@@ -30,6 +30,7 @@ impl<VM: VMBinding> GCRequester<VM> {
             // `GCWorkScheduler::request_schedule_collection` needs to hold a mutex to communicate
             // with GC workers, which is expensive for functions like `poll`.  We use the atomic
             // flag `request_flag` to elide the need to acquire the mutex in subsequent calls.
+            probe!(mmtk, gcrequester_request);
             self.scheduler.request_schedule_collection();
         }
     }
diff --git a/src/plan/gc_work.rs b/src/plan/gc_work.rs
new file mode 100644
index 0000000000..4998d1101c
--- /dev/null
+++ b/src/plan/gc_work.rs
@@ -0,0 +1,32 @@
+//! This module holds work packets for `CommonPlan` and `BasePlan`, or other work packets not
+//! directly related to scheduling.
+
+use crate::{plan::global::CommonPlan, scheduler::GCWork, vm::VMBinding};
+
+pub(super) struct SetCommonPlanUnlogBits<VM: VMBinding> {
+    pub common_plan: &'static CommonPlan<VM>,
+}
+
+impl<VM: VMBinding> GCWork<VM> for SetCommonPlanUnlogBits<VM> {
+    fn do_work(
+        &mut self,
+        _worker: &mut crate::scheduler::GCWorker<VM>,
+        _mmtk: &'static crate::MMTK<VM>,
+    ) {
+        self.common_plan.set_side_log_bits();
+    }
+}
+
+pub(super) struct ClearCommonPlanUnlogBits<VM: VMBinding> {
+    pub common_plan: &'static CommonPlan<VM>,
+}
+
+impl<VM: VMBinding> GCWork<VM> for ClearCommonPlanUnlogBits<VM> {
+    fn do_work(
+        &mut self,
+        _worker: &mut crate::scheduler::GCWorker<VM>,
+        _mmtk: &'static crate::MMTK<VM>,
+    ) {
+        self.common_plan.clear_side_log_bits();
+    }
+}
diff --git a/src/plan/generational/immix/global.rs b/src/plan/generational/immix/global.rs
index 41e7a70768..02e9df1b9f 100644
--- a/src/plan/generational/immix/global.rs
+++ b/src/plan/generational/immix/global.rs
@@ -10,6 +10,7 @@ use crate::plan::AllocationSemantics;
 use crate::plan::Plan;
 use crate::plan::PlanConstraints;
 use crate::policy::gc_work::TraceKind;
+use crate::policy::immix::defrag::StatsForDefrag;
 use crate::policy::immix::ImmixSpace;
 use crate::policy::immix::ImmixSpaceArgs;
 use crate::policy::immix::{TRACE_KIND_DEFRAG, TRACE_KIND_FAST};
@@ -20,6 +21,7 @@ use crate::util::alloc::allocators::AllocatorSelector;
 use crate::util::copy::*;
 use crate::util::heap::gc_trigger::SpaceStats;
 use crate::util::heap::VMRequest;
+use crate::util::metadata::log_bit::UnlogBitsOperation;
 use crate::util::Address;
 use crate::util::ObjectReference;
 use crate::util::VMWorkerThread;
@@ -129,13 +131,15 @@ impl<VM: VMBinding> Plan for GenImmix<VM> {
         let full_heap = !self.gen.is_current_gc_nursery();
         self.gen.prepare(tls);
         if full_heap {
-            if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() {
-                self.immix_space.clear_side_log_bits();
-            }
             self.immix_space.prepare(
                 full_heap,
-                Some(crate::policy::immix::defrag::StatsForDefrag::new(self)),
+                Some(StatsForDefrag::new(self)),
+                // Bulk clear unlog bits so that we will reconstruct them.
+                UnlogBitsOperation::BulkClear,
             );
+        } else {
+            // We don't do anything special to unlog bits during nursery GC
+            // because ProcessModBuf will set the unlog bits back.
         }
     }
 
@@ -143,8 +147,16 @@ impl<VM: VMBinding> Plan for GenImmix<VM> {
         let full_heap = !self.gen.is_current_gc_nursery();
         self.gen.release(tls);
         if full_heap {
-            self.immix_space.release(full_heap);
+            self.immix_space.release(
+                full_heap,
+                // We reconstructred unlog bits during tracing.  Keep them.
+                UnlogBitsOperation::NoOp,
+            );
+        } else {
+            // We don't do anything special to unlog bits during nursery GC
+            // because ProcessModBuf has set the unlog bits back.
         }
+
         self.last_gc_was_full_heap
             .store(full_heap, Ordering::Relaxed);
     }
diff --git a/src/plan/global.rs b/src/plan/global.rs
index f0736c4df5..5cb6ce9bb9 100644
--- a/src/plan/global.rs
+++ b/src/plan/global.rs
@@ -3,6 +3,7 @@
 use super::PlanConstraints;
 use crate::global_state::GlobalState;
 use crate::mmtk::MMTK;
+use crate::plan::gc_work::{ClearCommonPlanUnlogBits, SetCommonPlanUnlogBits};
 use crate::plan::tracing::ObjectQueue;
 use crate::plan::Mutator;
 use crate::policy::immortalspace::ImmortalSpace;
@@ -19,6 +20,7 @@ use crate::util::heap::layout::Mmapper;
 use crate::util::heap::layout::VMMap;
 use crate::util::heap::HeapMeta;
 use crate::util::heap::VMRequest;
+use crate::util::metadata::log_bit::UnlogBitsOperation;
 use crate::util::metadata::side_metadata::SideMetadataSanity;
 use crate::util::metadata::side_metadata::SideMetadataSpec;
 use crate::util::options::Options;
@@ -58,6 +60,9 @@ pub fn create_mutator<VM: VMBinding>(
         PlanSelector::StickyImmix => {
             crate::plan::sticky::immix::mutator::create_stickyimmix_mutator(tls, mmtk)
         }
+        PlanSelector::ConcurrentImmix => {
+            crate::plan::concurrent::immix::mutator::create_concurrent_immix_mutator(tls, mmtk)
+        }
         PlanSelector::Compressor => {
             crate::plan::compressor::mutator::create_compressor_mutator(tls, mmtk)
         }
@@ -94,6 +99,10 @@ pub fn create_plan<VM: VMBinding>(
         PlanSelector::StickyImmix => {
             Box::new(crate::plan::sticky::immix::StickyImmix::new(args)) as Box<dyn Plan<VM = VM>>
         }
+        PlanSelector::ConcurrentImmix => {
+            Box::new(crate::plan::concurrent::immix::ConcurrentImmix::new(args))
+                as Box<dyn Plan<VM = VM>>
+        }
         PlanSelector::Compressor => {
             Box::new(crate::plan::compressor::Compressor::new(args)) as Box<dyn Plan<VM = VM>>
         }
@@ -179,6 +188,14 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast {
         None
     }
 
+    /// Return a reference to `ConcurrentPlan` to allow
+    /// access methods specific to concurrent plans if the plan is a concurrent plan.
+    fn concurrent(
+        &self,
+    ) -> Option<&dyn crate::plan::concurrent::global::ConcurrentPlan<VM = Self::VM>> {
+        None
+    }
+
     /// Get the current run time options.
     fn options(&self) -> &Options {
         &self.base().options
@@ -188,6 +205,9 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast {
     /// This defines what space this plan will allocate objects into for different semantics.
     fn get_allocator_mapping(&self) -> &'static EnumMap<AllocationSemantics, AllocatorSelector>;
 
+    /// Called when all mutators are paused. This is called before prepare.
+    fn notify_mutators_paused(&self, _scheduler: &GCWorkScheduler<Self::VM>) {}
+
     /// Prepare the plan before a GC. This is invoked in an initial step in the GC.
     /// This is invoked once per GC by one worker thread. `tls` is the worker thread that executes this method.
     fn prepare(&mut self, tls: VMWorkerThread);
@@ -203,6 +223,7 @@ pub trait Plan: 'static + HasSpaces + Sync + Downcast {
 
     /// Inform the plan about the end of a GC. It is guaranteed that there is no further work for this GC.
     /// This is invoked once per GC by one worker thread. `tls` is the worker thread that executes this method.
+    // TODO: This is actually called at the end of a pause/STW, rather than the end of a GC. It should be renamed.
     fn end_of_gc(&mut self, _tls: VMWorkerThread);
 
     /// Notify the plan that an emergency collection will happen. The plan should try to free as much memory as possible.
@@ -734,6 +755,25 @@ impl<VM: VMBinding> CommonPlan<VM> {
         self.base.release(tls, full_heap)
     }
 
+    pub(crate) fn schedule_unlog_bits_op(&mut self, unlog_bits_op: UnlogBitsOperation) {
+        if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() {
+            // # Safety: CommonPlan reference is always valid within this collection cycle.
+            let common_plan = unsafe { &*(self as *const CommonPlan<VM>) };
+
+            match unlog_bits_op {
+                UnlogBitsOperation::NoOp => {}
+                UnlogBitsOperation::BulkSet => {
+                    self.base.scheduler.work_buckets[WorkBucketStage::Prepare]
+                        .add(SetCommonPlanUnlogBits { common_plan });
+                }
+                UnlogBitsOperation::BulkClear => {
+                    self.base.scheduler.work_buckets[WorkBucketStage::Release]
+                        .add(ClearCommonPlanUnlogBits { common_plan });
+                }
+            }
+        }
+    }
+
     pub fn clear_side_log_bits(&self) {
         self.immortal.clear_side_log_bits();
         self.los.clear_side_log_bits();
@@ -788,7 +828,7 @@ impl<VM: VMBinding> CommonPlan<VM> {
             } else if #[cfg(feature = "marksweep_as_nonmoving")] {
                 self.nonmoving.prepare(_full_heap);
             } else {
-                self.nonmoving.prepare(_full_heap, None);
+                self.nonmoving.prepare(_full_heap, None, UnlogBitsOperation::NoOp);
             }
         }
     }
@@ -800,7 +840,7 @@ impl<VM: VMBinding> CommonPlan<VM> {
             } else if #[cfg(feature = "marksweep_as_nonmoving")] {
                 self.nonmoving.prepare(_full_heap);
             } else {
-                self.nonmoving.release(_full_heap);
+                self.nonmoving.release(_full_heap, UnlogBitsOperation::NoOp);
             }
         }
     }
diff --git a/src/plan/immix/global.rs b/src/plan/immix/global.rs
index 136db58c43..f64d463ab4 100644
--- a/src/plan/immix/global.rs
+++ b/src/plan/immix/global.rs
@@ -15,6 +15,7 @@ use crate::util::alloc::allocators::AllocatorSelector;
 use crate::util::copy::*;
 use crate::util::heap::gc_trigger::SpaceStats;
 use crate::util::heap::VMRequest;
+use crate::util::metadata::log_bit::UnlogBitsOperation;
 use crate::util::metadata::side_metadata::SideMetadataContext;
 use crate::vm::VMBinding;
 use crate::{policy::immix::ImmixSpace, util::opaque_pointer::VMWorkerThread};
@@ -84,17 +85,11 @@ impl<VM: VMBinding> Plan for Immix<VM> {
     }
 
     fn prepare(&mut self, tls: VMWorkerThread) {
-        self.common.prepare(tls, true);
-        self.immix_space.prepare(
-            true,
-            Some(crate::policy::immix::defrag::StatsForDefrag::new(self)),
-        );
+        self.prepare_inner(tls, UnlogBitsOperation::NoOp)
     }
 
     fn release(&mut self, tls: VMWorkerThread) {
-        self.common.release(tls, true);
-        // release the collected region
-        self.immix_space.release(true);
+        self.release_inner(tls, UnlogBitsOperation::NoOp);
     }
 
     fn end_of_gc(&mut self, tls: VMWorkerThread) {
@@ -208,4 +203,31 @@ impl<VM: VMBinding> Immix<VM> {
     pub(in crate::plan) fn set_last_gc_was_defrag(&self, defrag: bool, order: Ordering) {
         self.last_gc_was_defrag.store(defrag, order)
     }
+
+    /// Prepare with unlog-bit operation.
+    /// Some Immix-derived plans may need to set/clear unlog bits when preparing.
+    pub(in crate::plan) fn prepare_inner(
+        &mut self,
+        tls: VMWorkerThread,
+        unlog_bits_op: UnlogBitsOperation,
+    ) {
+        self.common.prepare(tls, true);
+        self.immix_space.prepare(
+            true,
+            Some(crate::policy::immix::defrag::StatsForDefrag::new(self)),
+            unlog_bits_op,
+        );
+    }
+
+    /// Release with unlog-bit operation.
+    /// Some Immix-derived plans may need to set/clear unlog bits when releasing.
+    pub(in crate::plan) fn release_inner(
+        &mut self,
+        tls: VMWorkerThread,
+        unlog_bits_op: UnlogBitsOperation,
+    ) {
+        self.common.release(tls, true);
+        // release the collected region
+        self.immix_space.release(true, unlog_bits_op);
+    }
 }
diff --git a/src/plan/mod.rs b/src/plan/mod.rs
index e86a7fe590..ea7455e9eb 100644
--- a/src/plan/mod.rs
+++ b/src/plan/mod.rs
@@ -18,6 +18,8 @@ pub use barriers::BarrierSelector;
 
 pub(crate) mod gc_requester;
 
+mod gc_work;
+
 mod global;
 pub(crate) use global::create_gc_worker_context;
 pub(crate) use global::create_mutator;
@@ -45,6 +47,7 @@ mod generational;
 mod sticky;
 
 mod compressor;
+mod concurrent;
 mod immix;
 mod markcompact;
 mod marksweep;
diff --git a/src/plan/sticky/immix/global.rs b/src/plan/sticky/immix/global.rs
index 7dcd83ad3b..a3cac41c61 100644
--- a/src/plan/sticky/immix/global.rs
+++ b/src/plan/sticky/immix/global.rs
@@ -6,6 +6,7 @@ use crate::plan::immix;
 use crate::plan::PlanConstraints;
 use crate::policy::gc_work::TraceKind;
 use crate::policy::gc_work::TRACE_KIND_TRANSITIVE_PIN;
+use crate::policy::immix::defrag::StatsForDefrag;
 use crate::policy::immix::ImmixSpace;
 use crate::policy::immix::TRACE_KIND_FAST;
 use crate::policy::sft::SFT;
@@ -14,6 +15,7 @@ use crate::util::copy::CopyConfig;
 use crate::util::copy::CopySelector;
 use crate::util::copy::CopySemantics;
 use crate::util::heap::gc_trigger::SpaceStats;
+use crate::util::metadata::log_bit::UnlogBitsOperation;
 use crate::util::metadata::side_metadata::SideMetadataContext;
 use crate::util::statistics::counter::EventCounter;
 use crate::vm::ObjectModel;
@@ -118,24 +120,37 @@ impl<VM: VMBinding> Plan for StickyImmix<VM> {
             // Prepare both large object space and immix space
             self.immix.immix_space.prepare(
                 false,
-                Some(crate::policy::immix::defrag::StatsForDefrag::new(self)),
+                Some(StatsForDefrag::new(self)),
+                // We don't do anything special to unlog bits during nursery GC
+                // because ProcessModBuf will set the unlog bits back.
+                UnlogBitsOperation::NoOp,
             );
             self.immix.common.los.prepare(false);
         } else {
             self.full_heap_gc_count.lock().unwrap().inc();
-            if VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.is_on_side() {
-                self.immix.immix_space.clear_side_log_bits();
-            }
-            self.immix.prepare(tls);
+            self.immix.prepare_inner(
+                tls,
+                // We will reconstruct unlog bits during tracing.
+                UnlogBitsOperation::BulkClear,
+            );
         }
     }
 
     fn release(&mut self, tls: crate::util::VMWorkerThread) {
         if self.is_current_gc_nursery() {
-            self.immix.immix_space.release(false);
+            self.immix.immix_space.release(
+                false,
+                // We don't do anything special to unlog bits during nursery GC
+                // because ProcessModBuf has set the unlog bits back.
+                UnlogBitsOperation::NoOp,
+            );
             self.immix.common.los.release(false);
         } else {
-            self.immix.release(tls);
+            self.immix.release_inner(
+                tls,
+                // We reconstructred unlog bits during tracing.  Keep them.
+                UnlogBitsOperation::NoOp,
+            );
         }
     }
 
diff --git a/src/plan/tracing.rs b/src/plan/tracing.rs
index eecd40cbaf..792e142c76 100644
--- a/src/plan/tracing.rs
+++ b/src/plan/tracing.rs
@@ -1,10 +1,12 @@
 //! This module contains code useful for tracing,
 //! i.e. visiting the reachable objects by traversing all or part of an object graph.
 
+use std::marker::PhantomData;
+
 use crate::scheduler::gc_work::{ProcessEdgesWork, SlotOf};
 use crate::scheduler::{GCWorker, WorkBucketStage, EDGES_WORK_BUFFER_SIZE};
-use crate::util::ObjectReference;
-use crate::vm::SlotVisitor;
+use crate::util::{ObjectReference, VMThread, VMWorkerThread};
+use crate::vm::{Scanning, SlotVisitor, VMBinding};
 
 /// This trait represents an object queue to enqueue objects during tracing.
 pub trait ObjectQueue {
@@ -63,6 +65,16 @@ impl<T> VectorQueue<T> {
         }
         self.buffer.push(v);
     }
+
+    /// Return the len of the queue
+    pub fn len(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Empty the queue
+    pub fn clear(&mut self) {
+        self.buffer.clear()
+    }
 }
 
 impl<T> Default for VectorQueue<T> {
@@ -134,3 +146,24 @@ impl<E: ProcessEdgesWork> Drop for ObjectsClosure<'_, E> {
         self.flush();
     }
 }
+
+/// For iterating over the slots of an object.
+// FIXME: This type iterates slots, but all of its current use cases only care about the values in the slots.
+// And it currently only works if the object supports slot enqueuing (i.e. `Scanning::scan_object` is implemented).
+// We may refactor the interface according to <https://github.com/mmtk/mmtk-core/issues/1375>
+pub(crate) struct SlotIterator<VM: VMBinding> {
+    _p: PhantomData<VM>,
+}
+
+impl<VM: VMBinding> SlotIterator<VM> {
+    /// Iterate over the slots of an object by applying a function to each slot.
+    pub fn iterate_fields<F: FnMut(VM::VMSlot)>(object: ObjectReference, _tls: VMThread, mut f: F) {
+        // FIXME: We should use tls from the arguments.
+        // See https://github.com/mmtk/mmtk-core/issues/1375
+        let fake_tls = VMWorkerThread(VMThread::UNINITIALIZED);
+        if !<VM::VMScanning as Scanning<VM>>::support_slot_enqueuing(fake_tls, object) {
+            panic!("SlotIterator::iterate_fields cannot be used on objects that don't support slot-enqueuing");
+        }
+        <VM::VMScanning as Scanning<VM>>::scan_object(fake_tls, object, &mut f);
+    }
+}
diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs
index bc3a585eff..18c4045352 100644
--- a/src/policy/immix/immixspace.rs
+++ b/src/policy/immix/immixspace.rs
@@ -14,6 +14,7 @@ use crate::util::heap::chunk_map::*;
 use crate::util::heap::BlockPageResource;
 use crate::util::heap::PageResource;
 use crate::util::linear_scan::{Region, RegionIterator};
+use crate::util::metadata::log_bit::UnlogBitsOperation;
 use crate::util::metadata::side_metadata::SideMetadataSpec;
 #[cfg(feature = "vo_bit")]
 use crate::util::metadata::vo_bit;
@@ -197,6 +198,9 @@ impl<VM: VMBinding> Space<VM> for ImmixSpace<VM> {
     }
 
     fn clear_side_log_bits(&self) {
+        // Remove the following warning if we have a legitimate use case.
+        warn!("ImmixSpace::clear_side_log_bits is single-treaded.  Consider clearing side metadata in per-chunk work packets.");
+
         let log_bit = VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.extract_side_spec();
         for chunk in self.chunk_map.all_chunks() {
             log_bit.bzero_metadata(chunk.start(), Chunk::BYTES);
@@ -204,6 +208,9 @@ impl<VM: VMBinding> Space<VM> for ImmixSpace<VM> {
     }
 
     fn set_side_log_bits(&self) {
+        // Remove the following warning if we have a legitimate use case.
+        warn!("ImmixSpace::set_side_log_bits is single-treaded.  Consider setting side metadata in per-chunk work packets.");
+
         let log_bit = VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.extract_side_spec();
         for chunk in self.chunk_map.all_chunks() {
             log_bit.bset_metadata(chunk.start(), Chunk::BYTES);
@@ -417,7 +424,12 @@ impl<VM: VMBinding> ImmixSpace<VM> {
         &self.scheduler
     }
 
-    pub fn prepare(&mut self, major_gc: bool, plan_stats: Option<StatsForDefrag>) {
+    pub(crate) fn prepare(
+        &mut self,
+        major_gc: bool,
+        plan_stats: Option<StatsForDefrag>,
+        unlog_bits_op: UnlogBitsOperation,
+    ) {
         if major_gc {
             // Update mark_state
             if VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.is_on_side() {
@@ -445,6 +457,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
                     } else {
                         None
                     },
+                    unlog_bits_op,
                 })
             });
             self.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets);
@@ -496,7 +509,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
     }
 
     /// Release for the immix space.
-    pub fn release(&mut self, major_gc: bool) {
+    pub(crate) fn release(&mut self, major_gc: bool, unlog_bits_op: UnlogBitsOperation) {
         if major_gc {
             // Update line_unavail_state for hole searching after this GC.
             if !super::BLOCK_ONLY {
@@ -511,7 +524,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
             self.reusable_blocks.reset();
         }
         // Sweep chunks and blocks
-        let work_packets = self.generate_sweep_tasks();
+        let work_packets = self.generate_sweep_tasks(unlog_bits_op);
         self.scheduler().work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
 
         self.lines_consumed.store(0, Ordering::Relaxed);
@@ -528,7 +541,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
     }
 
     /// Generate chunk sweep tasks
-    fn generate_sweep_tasks(&self) -> Vec<Box<dyn GCWork<VM>>> {
+    fn generate_sweep_tasks(&self, unlog_bits_op: UnlogBitsOperation) -> Vec<Box<dyn GCWork<VM>>> {
         self.defrag.mark_histograms.lock().clear();
         // # Safety: ImmixSpace reference is always valid within this collection cycle.
         let space = unsafe { &*(self as *const Self) };
@@ -540,6 +553,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
             Box::new(SweepChunk {
                 space,
                 chunk,
+                unlog_bits_op,
                 epilogue: epilogue.clone(),
             })
         });
@@ -909,6 +923,7 @@ pub struct PrepareBlockState<VM: VMBinding> {
     pub space: &'static ImmixSpace<VM>,
     pub chunk: Chunk,
     pub defrag_threshold: Option<usize>,
+    pub unlog_bits_op: UnlogBitsOperation,
 }
 
 impl<VM: VMBinding> PrepareBlockState<VM> {
@@ -953,6 +968,9 @@ impl<VM: VMBinding> GCWork<VM> for PrepareBlockState<VM> {
             debug_assert!(!block.get_state().is_reusable());
             debug_assert_ne!(block.get_state(), BlockState::Marked);
         }
+
+        self.unlog_bits_op
+            .execute::<VM>(self.chunk.start(), Chunk::BYTES);
     }
 }
 
@@ -960,6 +978,7 @@ impl<VM: VMBinding> GCWork<VM> for PrepareBlockState<VM> {
 struct SweepChunk<VM: VMBinding> {
     space: &'static ImmixSpace<VM>,
     chunk: Chunk,
+    unlog_bits_op: UnlogBitsOperation,
     /// A destructor invoked when all `SweepChunk` packets are finished.
     epilogue: Arc<FlushPageResource<VM>>,
 }
@@ -1020,6 +1039,10 @@ impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
             self.space.chunk_map.set_allocated(self.chunk, false)
         }
         self.space.defrag.add_completed_mark_histogram(histogram);
+
+        self.unlog_bits_op
+            .execute::<VM>(self.chunk.start(), Chunk::BYTES);
+
         self.epilogue.finish_one_work_packet();
     }
 }
diff --git a/src/policy/immix/line.rs b/src/policy/immix/line.rs
index 94036ecc65..a8d2e9686e 100644
--- a/src/policy/immix/line.rs
+++ b/src/policy/immix/line.rs
@@ -1,3 +1,5 @@
+use std::ops::Range;
+
 use super::block::Block;
 use crate::util::linear_scan::{Region, RegionIterator};
 use crate::util::metadata::side_metadata::SideMetadataSpec;
@@ -81,4 +83,32 @@ impl Line {
         }
         marked_lines
     }
+
+    /// Bulk set the local mark bits of a line range.
+    ///
+    /// This is useful during concurrent marking. By doing this, concurrent marking will
+    /// conservatively consider all objects allocated in the line range as live, and the mutator
+    /// doesn't need to explicitly mark bump-allocated objects in the fast path.
+    pub fn initialize_mark_table_as_marked<VM: VMBinding>(lines: Range<Line>) {
+        let meta = VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.extract_side_spec();
+        let start = lines.start.start();
+        let limit = lines.end.start();
+        let size = limit - start;
+        meta.bset_metadata(start, size);
+    }
+
+    /// Bulk set line mark states.
+    pub fn bulk_set_line_mark_states(line_mark_state: u8, lines: Range<Line>) {
+        for line in RegionIterator::<Line>::new(lines.start, lines.end) {
+            line.mark(line_mark_state);
+        }
+    }
+
+    /// Eagerly mark all line mark states and all side mark bits in the gap.
+    ///
+    /// Useful during concurrent marking.
+    pub fn eager_mark_lines<VM: VMBinding>(line_mark_state: u8, lines: Range<Line>) {
+        Self::bulk_set_line_mark_states(line_mark_state, lines.clone());
+        Self::initialize_mark_table_as_marked::<VM>(lines);
+    }
 }
diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs
index 610083eac0..f62202eccf 100644
--- a/src/policy/largeobjectspace.rs
+++ b/src/policy/largeobjectspace.rs
@@ -61,6 +61,24 @@ impl<VM: VMBinding> SFT for LargeObjectSpace<VM> {
         true
     }
     fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool) {
+        if self.should_allocate_as_live() {
+            VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.store_atomic::<VM, u8>(
+                object,
+                self.mark_state,
+                None,
+                Ordering::SeqCst,
+            );
+            debug_assert!(
+                VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.load_atomic::<VM, u8>(
+                    object,
+                    None,
+                    Ordering::Acquire
+                ) == 0
+            );
+
+            self.treadmill.add_to_treadmill(object, false);
+            return;
+        }
         let old_value = VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.load_atomic::<VM, u8>(
             object,
             None,
@@ -205,8 +223,6 @@ impl<VM: VMBinding> Space<VM> for LargeObjectSpace<VM> {
     }
 
     fn set_side_log_bits(&self) {
-        debug_assert!(self.treadmill.is_from_space_empty());
-        debug_assert!(self.treadmill.is_nursery_empty());
         let mut enumator = ClosureObjectEnumerator::<_, VM>::new(|object| {
             VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC.mark_as_unlogged::<VM>(object, Ordering::SeqCst);
         });
@@ -267,7 +283,6 @@ impl<VM: VMBinding> LargeObjectSpace<VM> {
 
     pub fn prepare(&mut self, full_heap: bool) {
         if full_heap {
-            debug_assert!(self.treadmill.is_from_space_empty());
             self.mark_state = MARK_BIT - self.mark_state;
         }
         self.treadmill.flip(full_heap);
@@ -281,6 +296,7 @@ impl<VM: VMBinding> LargeObjectSpace<VM> {
             self.sweep_large_pages(false);
         }
     }
+
     // Allow nested-if for this function to make it clear that test_and_mark() is only executed
     // for the outer condition is met.
     #[allow(clippy::collapsible_if)]
@@ -413,6 +429,10 @@ impl<VM: VMBinding> LargeObjectSpace<VM> {
         ) & NURSERY_BIT
             == NURSERY_BIT
     }
+
+    pub fn is_marked(&self, object: ObjectReference) -> bool {
+        self.test_mark_bit(object, self.mark_state)
+    }
 }
 
 fn get_super_page(cell: Address) -> Address {
diff --git a/src/policy/space.rs b/src/policy/space.rs
index fcfe9b4088..a80dc3f973 100644
--- a/src/policy/space.rs
+++ b/src/policy/space.rs
@@ -34,6 +34,7 @@ use crate::util::memory::{self, HugePageSupport, MmapProtection, MmapStrategy};
 use crate::vm::VMBinding;
 
 use std::marker::PhantomData;
+use std::sync::atomic::AtomicBool;
 use std::sync::Arc;
 use std::sync::Mutex;
 
@@ -418,6 +419,18 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
     /// scanning VO bits because it is sparse.
     fn enumerate_objects(&self, enumerator: &mut dyn ObjectEnumerator);
 
+    fn set_allocate_as_live(&self, live: bool) {
+        self.common()
+            .allocate_as_live
+            .store(live, std::sync::atomic::Ordering::SeqCst);
+    }
+
+    fn should_allocate_as_live(&self) -> bool {
+        self.common()
+            .allocate_as_live
+            .load(std::sync::atomic::Ordering::Acquire)
+    }
+
     /// Clear the side log bits for allocated regions in this space.
     /// This method is only called if the plan knows the log bits are side metadata.
     fn clear_side_log_bits(&self);
@@ -526,6 +539,8 @@ pub struct CommonSpace<VM: VMBinding> {
     pub global_state: Arc<GlobalState>,
     pub options: Arc<Options>,
 
+    pub allocate_as_live: AtomicBool,
+
     p: PhantomData<VM>,
 }
 
@@ -600,6 +615,7 @@ impl<VM: VMBinding> CommonSpace<VM> {
             acquire_lock: Mutex::new(()),
             global_state: args.plan_args.global_state,
             options: args.plan_args.options.clone(),
+            allocate_as_live: AtomicBool::new(false),
             p: PhantomData,
         };
 
diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs
index 7e50c86aa3..bfc14cb7ec 100644
--- a/src/scheduler/gc_work.rs
+++ b/src/scheduler/gc_work.rs
@@ -191,11 +191,31 @@ impl<VM: VMBinding> GCWork<VM> for ReleaseCollector {
 ///
 /// TODO: Smaller work granularity
 #[derive(Default)]
-pub struct StopMutators<C: GCWorkContext>(PhantomData<C>);
+pub struct StopMutators<C: GCWorkContext> {
+    /// If this is true, we skip creating [`ScanMutatorRoots`] work packets for mutators.
+    /// By default, this is false.
+    skip_mutator_roots: bool,
+    /// Flush mutators once they are stopped. By default this is false. [`ScanMutatorRoots`] will flush mutators.
+    flush_mutator: bool,
+    phantom: PhantomData<C>,
+}
 
 impl<C: GCWorkContext> StopMutators<C> {
     pub fn new() -> Self {
-        Self(PhantomData)
+        Self {
+            skip_mutator_roots: false,
+            flush_mutator: false,
+            phantom: PhantomData,
+        }
+    }
+
+    /// Create a `StopMutators` work packet that does not create `ScanMutatorRoots` work packets for mutators, and will simply flush mutators.
+    pub fn new_no_scan_roots() -> Self {
+        Self {
+            skip_mutator_roots: true,
+            flush_mutator: true,
+            phantom: PhantomData,
+        }
     }
 }
 
@@ -206,10 +226,17 @@ impl<C: GCWorkContext> GCWork<C::VM> for StopMutators<C> {
         <C::VM as VMBinding>::VMCollection::stop_all_mutators(worker.tls, |mutator| {
             // TODO: The stack scanning work won't start immediately, as the `Prepare` bucket is not opened yet (the bucket is opened in notify_mutators_paused).
             // Should we push to Unconstrained instead?
-            mmtk.scheduler.work_buckets[WorkBucketStage::Prepare]
-                .add(ScanMutatorRoots::<C>(mutator));
+
+            if self.flush_mutator {
+                mutator.flush();
+            }
+            if !self.skip_mutator_roots {
+                mmtk.scheduler.work_buckets[WorkBucketStage::Prepare]
+                    .add(ScanMutatorRoots::<C>(mutator));
+            }
         });
         trace!("stop_all_mutators end");
+        mmtk.get_plan().notify_mutators_paused(&mmtk.scheduler);
         mmtk.scheduler.notify_mutators_paused(mmtk);
         mmtk.scheduler.work_buckets[WorkBucketStage::Prepare].add(ScanVMSpecificRoots::<C>::new());
     }
diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs
index 33e89be0fe..5006edbe1d 100644
--- a/src/scheduler/mod.rs
+++ b/src/scheduler/mod.rs
@@ -17,7 +17,7 @@ pub(crate) use scheduler::GCWorkScheduler;
 mod stat;
 mod work_counter;
 
-mod work;
+pub(crate) mod work;
 pub use work::GCWork;
 pub(crate) use work::GCWorkContext;
 
diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs
index d6c4eff74e..38d387b3c4 100644
--- a/src/scheduler/scheduler.rs
+++ b/src/scheduler/scheduler.rs
@@ -131,6 +131,7 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
     /// Add the `ScheduleCollection` packet.  Called by the last parked worker.
     fn add_schedule_collection_packet(&self) {
         // We are still holding the mutex `WorkerMonitor::sync`.  Do not notify now.
+        probe!(mmtk, add_schedule_collection_packet);
         self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection);
     }
 
@@ -272,6 +273,7 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
     ///
     /// Return true if there're any non-empty buckets updated.
     pub(crate) fn update_buckets(&self) -> bool {
+        debug!("update_buckets");
         let mut buckets_updated = false;
         let mut new_packets = false;
         for i in 0..WorkBucketStage::LENGTH {
@@ -457,11 +459,20 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
                     LastParkedResult::WakeAll
                 } else {
                     // GC finished.
-                    self.on_gc_finished(worker);
+                    let concurrent_work_scheduled = self.on_gc_finished(worker);
 
                     // Clear the current goal
                     goals.on_current_goal_completed();
-                    self.respond_to_requests(worker, goals)
+
+                    if concurrent_work_scheduled {
+                        // It was the initial mark pause and scheduled concurrent work.
+                        // Wake up all GC workers to do concurrent work.
+                        LastParkedResult::WakeAll
+                    } else {
+                        // It was an STW GC or the final mark pause of a concurrent GC.
+                        // Respond to another goal.
+                        self.respond_to_requests(worker, goals)
+                    }
                 }
             }
             WorkerGoal::StopForFork => {
@@ -534,7 +545,9 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
     }
 
     /// Called when GC has finished, i.e. when all work packets have been executed.
-    fn on_gc_finished(&self, worker: &GCWorker<VM>) {
+    ///
+    /// Return `true` if any concurrent work packets have been scheduled.
+    fn on_gc_finished(&self, worker: &GCWorker<VM>) -> bool {
         // All GC workers must have parked by now.
         debug_assert!(!self.worker_group.has_designated_work());
         self.debug_assert_all_stw_buckets_empty();
@@ -590,6 +603,9 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
             }
         }
 
+        mmtk.state
+            .set_used_pages_after_last_gc(mmtk.get_plan().get_used_pages());
+
         #[cfg(feature = "extreme_assertions")]
         if crate::util::slot_logger::should_check_duplicate_slots(mmtk.get_plan()) {
             // reset the logging info at the end of each GC
@@ -599,9 +615,14 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
         // Reset the triggering information.
         mmtk.state.reset_collection_trigger();
 
+        let concurrent_work_scheduled = self.schedule_concurrent_packets();
+        self.debug_assert_all_stw_buckets_closed();
+
         // Set to NotInGC after everything, and right before resuming mutators.
         mmtk.set_gc_status(GcStatus::NotInGC);
         <VM as VMBinding>::VMCollection::resume_mutators(worker.tls);
+
+        concurrent_work_scheduled
     }
 
     pub fn enable_stat(&self) {
@@ -634,4 +655,17 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
         first_stw_bucket.open();
         self.worker_monitor.notify_work_available(true);
     }
+
+    pub(super) fn schedule_concurrent_packets(&self) -> bool {
+        let concurrent_bucket = &self.work_buckets[WorkBucketStage::Concurrent];
+        if !concurrent_bucket.is_empty() {
+            concurrent_bucket.set_enabled(true);
+            concurrent_bucket.open();
+            true
+        } else {
+            concurrent_bucket.set_enabled(false);
+            concurrent_bucket.close();
+            false
+        }
+    }
 }
diff --git a/src/util/alloc/immix_allocator.rs b/src/util/alloc/immix_allocator.rs
index 807ddded90..eb2e5235fa 100644
--- a/src/util/alloc/immix_allocator.rs
+++ b/src/util/alloc/immix_allocator.rs
@@ -1,3 +1,4 @@
+use std::sync::atomic::Ordering;
 use std::sync::Arc;
 
 use super::allocator::{align_allocation_no_fill, fill_alignment_gap, AllocatorContext};
@@ -265,6 +266,11 @@ impl<VM: VMBinding> ImmixAllocator<VM> {
                     // Update the hole-searching cursor to None.
                     Some(end_line)
                 };
+                // mark objects if concurrent marking is active
+                if self.immix_space().should_allocate_as_live() {
+                    let state = self.space.line_mark_state.load(Ordering::Acquire);
+                    Line::eager_mark_lines::<VM>(state, start_line..end_line);
+                }
                 return true;
             } else {
                 // No more recyclable lines. Set the hole-searching cursor to None.
@@ -305,6 +311,11 @@ impl<VM: VMBinding> ImmixAllocator<VM> {
                 // Bulk clear stale line mark state
                 Line::MARK_TABLE
                     .bzero_metadata(block.start(), crate::policy::immix::block::Block::BYTES);
+                // mark objects if concurrent marking is active
+                if self.immix_space().should_allocate_as_live() {
+                    let state = self.space.line_mark_state.load(Ordering::Acquire);
+                    Line::eager_mark_lines::<VM>(state, block.start_line()..block.end_line());
+                }
                 if self.request_for_large {
                     self.large_bump_pointer.cursor = block.start();
                     self.large_bump_pointer.limit = block.end();
diff --git a/src/util/metadata/log_bit.rs b/src/util/metadata/log_bit.rs
index a5a9b8644f..6ea012acbd 100644
--- a/src/util/metadata/log_bit.rs
+++ b/src/util/metadata/log_bit.rs
@@ -1,4 +1,6 @@
+use crate::util::Address;
 use crate::util::ObjectReference;
+use crate::vm::ObjectModel;
 use crate::vm::VMBinding;
 use crate::vm::VMGlobalLogBitSpec;
 use std::sync::atomic::Ordering;
@@ -38,3 +40,31 @@ impl VMGlobalLogBitSpec {
         self.load_atomic::<VM, u8>(object, None, order) == 1
     }
 }
+
+/// This specifies what to do to the global side unlog bits in various functions or work packets.
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub(crate) enum UnlogBitsOperation {
+    /// Do nothing.
+    NoOp,
+    /// Bulk set unlog bits to all 1s.
+    BulkSet,
+    /// Bulk clear unlog bits to all 0s.
+    BulkClear,
+}
+
+impl UnlogBitsOperation {
+    /// Run the specified operation on the address range from `start` to `start + size`.
+    pub(crate) fn execute<VM: VMBinding>(&self, start: Address, size: usize) {
+        if let MetadataSpec::OnSide(ref unlog_bits) = *VM::VMObjectModel::GLOBAL_LOG_BIT_SPEC {
+            match self {
+                UnlogBitsOperation::NoOp => {}
+                UnlogBitsOperation::BulkSet => {
+                    unlog_bits.bset_metadata(start, size);
+                }
+                UnlogBitsOperation::BulkClear => {
+                    unlog_bits.bzero_metadata(start, size);
+                }
+            }
+        }
+    }
+}
diff --git a/src/util/options.rs b/src/util/options.rs
index fab347c153..fd8952d1da 100644
--- a/src/util/options.rs
+++ b/src/util/options.rs
@@ -50,6 +50,8 @@ pub enum PlanSelector {
     Compressor,
     /// An Immix collector that uses a sticky mark bit to allow generational behaviors without a copying nursery.
     StickyImmix,
+    /// Concurrent non-moving immix using SATB
+    ConcurrentImmix,
 }
 
 /// MMTk option for perf events
diff --git a/src/util/reference_processor.rs b/src/util/reference_processor.rs
index 3ca0486707..a5be7d3d4d 100644
--- a/src/util/reference_processor.rs
+++ b/src/util/reference_processor.rs
@@ -252,6 +252,11 @@ impl ReferenceProcessor {
 
     /// Inform the binding to enqueue the weak references whose referents were cleared in this GC.
     pub fn enqueue<VM: VMBinding>(&self, tls: VMWorkerThread) {
+        // We will acquire a lock below. If anyone tries to insert new weak refs which will acquire the same lock, a deadlock will occur.
+        // This does happen for OpenJDK with ConcurrentImmix where a write barrier is triggered during the enqueueing of weak references,
+        // and the write barrier scans the objects and attempts to add new weak references.
+        // Disallow new candidates to prevent the deadlock.
+        self.disallow_new_candidate();
         let mut sync = self.sync.lock().unwrap();
 
         // This is the end of a GC. We do some assertions here to make sure our reference tables are correct.
diff --git a/src/vm/tests/mock_tests/mock_test_allocator_info.rs b/src/vm/tests/mock_tests/mock_test_allocator_info.rs
index fc288e8041..a856278e94 100644
--- a/src/vm/tests/mock_tests/mock_test_allocator_info.rs
+++ b/src/vm/tests/mock_tests/mock_test_allocator_info.rs
@@ -29,6 +29,7 @@ pub fn test_allocator_info() {
                 | PlanSelector::GenImmix
                 | PlanSelector::MarkCompact
                 | PlanSelector::Compressor
+                | PlanSelector::ConcurrentImmix
                 | PlanSelector::StickyImmix => {
                     // These plans all use bump pointer allocator.
                     let AllocatorInfo::BumpPointer {
diff --git a/tools/tracing/timeline/capture.bt b/tools/tracing/timeline/capture.bt
index 31547c6e88..953ce72d2e 100644
--- a/tools/tracing/timeline/capture.bt
+++ b/tools/tracing/timeline/capture.bt
@@ -2,6 +2,7 @@ BEGIN {
     @harness = $HARNESS;
 
     @gc_count = 0;
+    @is_initial_mark = 0;
 
     if (!@harness) {
         //always collect data
@@ -30,7 +31,6 @@ usdt:$MMTK:mmtk:harness_end {
 
 usdt:$MMTK:mmtk:gc_start {
     printf("GC,B,%d,%lu\n", tid, nsecs);
-    @gc_count += 1;
     // bpftrace warns that signed `%` operator may have undefiend behavior.
     if ((uint64)@gc_count % $EVERY == 0 && @stats_enabled) {
         @enable_print = 1;
@@ -41,6 +41,14 @@ usdt:$MMTK:mmtk:gc_start {
 
 usdt:$MMTK:mmtk:gc_end {
     printf("GC,E,%d,%lu\n", tid, nsecs);
+
+    // We don't increment the GC count so that we always visualize both InitialMark and FinalMark or neither.
+    // FIXME: mmtk-core should emit distinct events for GC end and pause end.
+    if (!@is_initial_mark) {
+        @gc_count += 1;
+    }
+
+    @is_initial_mark = 0;
 }
 
 usdt:$MMTK:mmtk:bucket_opened {
@@ -124,6 +132,27 @@ usdt:$MMTK:mmtk:sweep_chunk {
     }
 }
 
+usdt:$MMTK:mmtk:concurrent_trace_objects {
+    if (@enable_print) {
+        printf("concurrent_trace_objects,meta,%d,%lu,%lu,%lu,%lu\n", tid, nsecs, arg0, arg1, arg2);
+    }
+}
+
+usdt:$MMTK:mmtk:gcrequester_request {
+    printf("gcrequester_request,i,%d,%lu\n", tid, nsecs);
+}
+
+usdt:$MMTK:mmtk:add_schedule_collection_packet {
+    printf("add_schedule_collection_packet,i,%d,%lu\n", tid, nsecs);
+}
+
+usdt:$MMTK:mmtk:concurrent_pause_determined {
+    printf("concurrent_pause_determined,meta,%d,%lu,%lu\n", tid, nsecs, arg0);
+    if (arg0 == 2) { // InitialMark
+        @is_initial_mark = 1;
+    }
+}
+
 usdt:$MMTK:mmtk:finalization {
     if (@enable_print) {
         printf("finalization,meta,%d,%lu,%lu,%lu,%lu,%lu\n", tid, nsecs, arg0, arg1, arg2, arg3);
diff --git a/tools/tracing/timeline/visualize.py b/tools/tracing/timeline/visualize.py
index 70063861c9..24c7bd9338 100755
--- a/tools/tracing/timeline/visualize.py
+++ b/tools/tracing/timeline/visualize.py
@@ -22,6 +22,11 @@ class Semantics(Enum):
     WEAK = 1
     PHANTOM = 2
 
+class Pause(Enum):
+    FULL = 1
+    INITIAL_MARK = 2
+    FINAL_MARK = 3
+
 def get_args():
     parser = argparse.ArgumentParser(
             description="""
@@ -156,6 +161,9 @@ def enrich_event(self, name, ph, tid, ts, result, args):
                     "stage": int(args[0]),
                 }
 
+            case "gcrequester_request":
+                result["tid"] = 1
+
             case _:
                 if self.enrich_event_extra is not None:
                     # Call ``enrich_event_extra`` in the extension script if defined.
@@ -251,6 +259,32 @@ def enrich_meta(self, name, tid, ts, gc, wp, args):
                         }
                     }
 
+                case "concurrent_trace_objects":
+                    objects = int(args[0])
+                    next_objects = int(args[1])
+                    iterations = int(args[2])
+                    total_objects = objects + next_objects
+                    wp["args"] |= {
+                        # Put args in a group.  See comments in "process_slots".
+                        "scan_objects": {
+                            "objects": objects,
+                            "next_objects": next_objects,
+                            "total_objects": total_objects,
+                            "iterations": iterations,
+                        }
+                    }
+
+                case "concurrent_pause_determined":
+                    pause_int = int(args[0])
+                    if pause_int in Pause:
+                        pause = Pause(pause_int).name
+                    else:
+                        pause = f"(Unknown:{pause_int})"
+
+                    gc["args"] |= {
+                        "pause": pause,
+                    }
+
                 case "sweep_chunk":
                     wp["args"] |= {
                         "allocated_blocks": int(args[0]),