From bed3a217438833357f4c616a4b31e303ab620add Mon Sep 17 00:00:00 2001
From: Kunshan Wang <wks1986@gmail.com>
Date: Mon, 1 Sep 2025 17:30:08 +0800
Subject: [PATCH 1/3] Reorganize Space::acquire

We rewrite the function into a more linear style and remove duplicated
code of handling blocking for GC.

Although this PR does not intend to change the logic of Space::acquire,
it does fix some obvious bugs.

-   Previously if the current thread is a mutator, or GC is not enabled,
    the thread will attempt to poll and block for GC if it fails to get
    new pages from the PageResource.  Now it panicks.

-   Previously the `on_pending_allocation` after failing to get new
    pages from PageResource did not include side metadata.  Now it
    always includes side metadata.
---
 src/policy/space.rs | 292 +++++++++++++++++++++++---------------------
 1 file changed, 152 insertions(+), 140 deletions(-)

diff --git a/src/policy/space.rs b/src/policy/space.rs
index 654bcb22a6..6605fe28e7 100644
--- a/src/policy/space.rs
+++ b/src/policy/space.rs
@@ -107,52 +107,42 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
             "The requested pages is larger than the max heap size. Is will_go_oom_on_acquire used before acquring memory?"
         );
 
-        // Should we poll to attempt to GC?
-        // - If tls is collector, we cannot attempt a GC.
-        // - If gc is disabled, we cannot attempt a GC.
-        // - If overcommit is allowed, we don't attempt a GC.
-        let should_poll = VM::VMActivePlan::is_mutator(tls)
-            && VM::VMCollection::is_collection_enabled()
-            && !alloc_options.on_fail.allow_overcommit();
-        // Is a GC allowed here? If we should poll but are not allowed to poll, we will panic.
-        // initialize_collection() has to be called so we know GC is initialized.
-        let allow_gc = should_poll
-            && self.common().global_state.is_initialized()
-            && alloc_options.on_fail.allow_gc();
-
         trace!("Reserving pages");
         let pr = self.get_page_resource();
         let pages_reserved = pr.reserve_pages(pages);
         trace!("Pages reserved");
         trace!("Polling ..");
 
-        if should_poll && self.get_gc_trigger().poll(false, Some(self.as_space())) {
-            // Clear the request
-            pr.clear_request(pages_reserved);
+        // Whether we have triggered GC.  There are two places in this function that can trigger GC.
+        let mut gc_triggered = false;
 
-            // If we do not want GC on fail, just return zero.
-            if !alloc_options.on_fail.allow_gc() {
-                return Address::ZERO;
-            }
+        let is_mutator = VM::VMActivePlan::is_mutator(tls);
+        let is_collection_enabled = VM::VMCollection::is_collection_enabled();
+        let allow_overcommit = alloc_options.on_fail.allow_overcommit();
 
-            // Otherwise do GC here
-            debug!("Collection required");
-            assert!(allow_gc, "GC is not allowed here: collection is not initialized (did you call initialize_collection()?).");
+        // In some cases, allocation failure cannot be tolerated.
+        // - Collector threads cannot fail when copying objects.
+        // - When GC is disabled, allocation failure will result in immediate panic.
+        let must_succeed = !is_mutator || !is_collection_enabled;
 
-            // Inform GC trigger about the pending allocation.
-            let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
-            let total_pages_reserved = pages_reserved + meta_pages_reserved;
-            self.get_gc_trigger()
-                .policy
-                .on_pending_allocation(total_pages_reserved);
+        // Is this allocation site allowed to trigger a GC?
+        // - If allocation failure cannot be tolerated, we won't trigger GC.
+        // - The `OnAllocationFail::OverCommit` case does not allow triggering GC.
+        //   TODO: We can allow it to trigger GC AND over-commit at the same time.
+        let allow_triggering_gc = !must_succeed && !allow_overcommit;
 
-            VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
+        if allow_triggering_gc {
+            gc_triggered = self.get_gc_trigger().poll(false, Some(self.as_space()));
+            debug!("Polled.  GC triggered? {gc_triggered}");
+        }
 
-            // Return zero -- the caller will handle re-attempting allocation
-            Address::ZERO
-        } else {
-            debug!("Collection not required");
+        // We should try to allocate if GC was not triggered.
+        // It includes the case that we didn't poll at all.
+        // TODO: If we allow triggering GC AND over-committing at the same time,
+        // we can still allocate even if polling triggers GC.
+        let should_try_to_allocate = !gc_triggered;
 
+        let attempted_allocation_and_failed = if should_try_to_allocate {
             // We need this lock: Othrewise, it is possible that one thread acquires pages in a new chunk, but not yet
             // set SFT for it (in grow_space()), and another thread acquires pages in the same chunk, which is not
             // a new chunk so grow_space() won't be called on it. The second thread could return a result in the chunk before
@@ -161,125 +151,147 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
             // See: https://github.com/mmtk/mmtk-core/issues/610
             let lock = self.common().acquire_lock.lock().unwrap();
 
-            match pr.get_new_pages(self.common().descriptor, pages_reserved, pages, tls) {
-                Ok(res) => {
-                    debug!(
-                        "Got new pages {} ({} pages) for {} in chunk {}, new_chunk? {}",
-                        res.start,
-                        res.pages,
-                        self.get_name(),
-                        conversions::chunk_align_down(res.start),
-                        res.new_chunk
-                    );
-                    let bytes = conversions::pages_to_bytes(res.pages);
-
-                    let mmap = || {
-                        // Mmap the pages and the side metadata, and handle error. In case of any error,
-                        // we will either call back to the VM for OOM, or simply panic.
-                        if let Err(mmap_error) = self
-                            .common()
-                            .mmapper
-                            .ensure_mapped(
-                                res.start,
-                                res.pages,
-                                self.common().mmap_strategy(),
-                                &memory::MmapAnnotation::Space {
-                                    name: self.get_name(),
-                                },
-                            )
-                            .and(self.common().metadata.try_map_metadata_space(
-                                res.start,
-                                bytes,
-                                self.get_name(),
-                            ))
-                        {
-                            memory::handle_mmap_error::<VM>(mmap_error, tls, res.start, bytes);
-                        }
-                    };
-                    let grow_space = || {
-                        self.grow_space(res.start, bytes, res.new_chunk);
-                    };
-
-                    // The scope of the lock is important in terms of performance when we have many allocator threads.
-                    if SFT_MAP.get_side_metadata().is_some() {
-                        // If the SFT map uses side metadata, so we have to initialize side metadata first.
-                        mmap();
-                        // then grow space, which will use the side metadata we mapped above
-                        grow_space();
-                        // then we can drop the lock after grow_space()
-                        drop(lock);
-                    } else {
-                        // In normal cases, we can drop lock immediately after grow_space()
-                        grow_space();
-                        drop(lock);
-                        // and map side metadata without holding the lock
-                        mmap();
+            if let Ok(res) = pr.get_new_pages(self.common().descriptor, pages_reserved, pages, tls)
+            {
+                debug!(
+                    "Got new pages {} ({} pages) for {} in chunk {}, new_chunk? {}",
+                    res.start,
+                    res.pages,
+                    self.get_name(),
+                    conversions::chunk_align_down(res.start),
+                    res.new_chunk
+                );
+                let bytes = conversions::pages_to_bytes(res.pages);
+
+                let mmap = || {
+                    // Mmap the pages and the side metadata, and handle error. In case of any error,
+                    // we will either call back to the VM for OOM, or simply panic.
+                    if let Err(mmap_error) = self
+                        .common()
+                        .mmapper
+                        .ensure_mapped(
+                            res.start,
+                            res.pages,
+                            self.common().mmap_strategy(),
+                            &memory::MmapAnnotation::Space {
+                                name: self.get_name(),
+                            },
+                        )
+                        .and(self.common().metadata.try_map_metadata_space(
+                            res.start,
+                            bytes,
+                            self.get_name(),
+                        ))
+                    {
+                        memory::handle_mmap_error::<VM>(mmap_error, tls, res.start, bytes);
                     }
+                };
+                let grow_space = || {
+                    self.grow_space(res.start, bytes, res.new_chunk);
+                };
+
+                // The scope of the lock is important in terms of performance when we have many allocator threads.
+                if SFT_MAP.get_side_metadata().is_some() {
+                    // If the SFT map uses side metadata, so we have to initialize side metadata first.
+                    mmap();
+                    // then grow space, which will use the side metadata we mapped above
+                    grow_space();
+                    // then we can drop the lock after grow_space()
+                    drop(lock);
+                } else {
+                    // In normal cases, we can drop lock immediately after grow_space()
+                    grow_space();
+                    drop(lock);
+                    // and map side metadata without holding the lock
+                    mmap();
+                }
 
-                    // TODO: Concurrent zeroing
-                    if self.common().zeroed {
-                        memory::zero(res.start, bytes);
-                    }
+                // TODO: Concurrent zeroing
+                if self.common().zeroed {
+                    memory::zero(res.start, bytes);
+                }
 
-                    // Some assertions
-                    {
-                        // --- Assert the start of the allocated region ---
-                        // The start address SFT should be correct.
-                        debug_assert_eq!(SFT_MAP.get_checked(res.start).name(), self.get_name());
-                        // The start address is in our space.
-                        debug_assert!(self.address_in_space(res.start));
-                        // The descriptor should be correct.
-                        debug_assert_eq!(
-                            self.common().vm_map().get_descriptor_for_address(res.start),
-                            self.common().descriptor
-                        );
-
-                        // --- Assert the last byte in the allocated region ---
-                        let last_byte = res.start + bytes - 1;
-                        // The SFT for the last byte in the allocated memory should be correct.
-                        debug_assert_eq!(SFT_MAP.get_checked(last_byte).name(), self.get_name());
-                        // The last byte in the allocated memory should be in this space.
-                        debug_assert!(self.address_in_space(last_byte));
-                        // The descriptor for the last byte should be correct.
-                        debug_assert_eq!(
-                            self.common().vm_map().get_descriptor_for_address(last_byte),
-                            self.common().descriptor
-                        );
-                    }
+                // Some assertions
+                {
+                    // --- Assert the start of the allocated region ---
+                    // The start address SFT should be correct.
+                    debug_assert_eq!(SFT_MAP.get_checked(res.start).name(), self.get_name());
+                    // The start address is in our space.
+                    debug_assert!(self.address_in_space(res.start));
+                    // The descriptor should be correct.
+                    debug_assert_eq!(
+                        self.common().vm_map().get_descriptor_for_address(res.start),
+                        self.common().descriptor
+                    );
 
-                    debug!("Space.acquire(), returned = {}", res.start);
-                    res.start
+                    // --- Assert the last byte in the allocated region ---
+                    let last_byte = res.start + bytes - 1;
+                    // The SFT for the last byte in the allocated memory should be correct.
+                    debug_assert_eq!(SFT_MAP.get_checked(last_byte).name(), self.get_name());
+                    // The last byte in the allocated memory should be in this space.
+                    debug_assert!(self.address_in_space(last_byte));
+                    // The descriptor for the last byte should be correct.
+                    debug_assert_eq!(
+                        self.common().vm_map().get_descriptor_for_address(last_byte),
+                        self.common().descriptor
+                    );
                 }
-                Err(_) => {
-                    drop(lock); // drop the lock immediately
 
-                    // Clear the request
-                    pr.clear_request(pages_reserved);
+                debug!("Space.acquire(), returned = {}", res.start);
+                return res.start;
+            }
 
-                    // If we do not want GC on fail, just return zero.
-                    if !alloc_options.on_fail.allow_gc() {
-                        return Address::ZERO;
-                    }
+            true
+        } else {
+            false
+        };
 
-                    // We thought we had memory to allocate, but somehow failed the allocation. Will force a GC.
-                    assert!(
-                        allow_gc,
-                        "Physical allocation failed when GC is not allowed!"
-                    );
+        // We didn't successfully allocate memory.
 
-                    let gc_performed = self.get_gc_trigger().poll(true, Some(self.as_space()));
-                    debug_assert!(gc_performed, "GC not performed when forced.");
+        // Clear the request
+        pr.clear_request(pages_reserved);
 
-                    // Inform GC trigger about the pending allocation.
-                    self.get_gc_trigger()
-                        .policy
-                        .on_pending_allocation(pages_reserved);
+        if must_succeed {
+            panic!("Failed to acquire pages.  is mutator? {is_mutator}, is collection enabled? {is_collection_enabled}");
+        }
 
-                    VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We asserted that this is mutator.
-                    Address::ZERO
-                }
+        if allow_triggering_gc {
+            if attempted_allocation_and_failed {
+                // If we reached here because we attempted allocation and failed,
+                // then we poll again with space_full=true.
+                // This time it must trigger GC.
+                gc_triggered = self.get_gc_trigger().poll(true, Some(self.as_space()));
+                debug_assert!(gc_triggered, "GC not performed when forced.");
+            }
+
+            // Are we allowed to block for GC?
+            // - If the MMTk instance is not initialized, there is no GC worker threads, and we can't block for GC.
+            // - If the on_fail option does not allow blocking for GC, we don't block.
+            // TODO: on_fail.allow_gc() really means whether it allows blocking for GC instead of whether we allow GC to happen.
+            let allow_blocking_for_gc = allow_triggering_gc
+                && self.common().global_state.is_initialized()
+                && alloc_options.on_fail.allow_gc();
+
+            if allow_blocking_for_gc {
+                assert!(
+                    gc_triggered,
+                    "Attempt to block for GC when GC is not triggered."
+                );
+
+                // Inform GC trigger about the pending allocation.
+                // Make sure we include side metadata.
+                let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
+                let total_pages_reserved = pages_reserved + meta_pages_reserved;
+                self.get_gc_trigger()
+                    .policy
+                    .on_pending_allocation(total_pages_reserved);
+
+                VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
             }
         }
+
+        // Return zero -- the caller will handle re-attempting allocation
+        Address::ZERO
     }
 
     fn address_in_space(&self, start: Address) -> bool {

From 4219f1556ed54d7c28231d108826970374db2dfb Mon Sep 17 00:00:00 2001
From: Kunshan Wang <wks1986@gmail.com>
Date: Thu, 9 Oct 2025 17:41:29 +0800
Subject: [PATCH 2/3] Make fewer changes

---
 src/policy/space.rs | 85 ++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/src/policy/space.rs b/src/policy/space.rs
index 2860329bbc..c9778f937c 100644
--- a/src/policy/space.rs
+++ b/src/policy/space.rs
@@ -108,6 +108,19 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
             "The requested pages is larger than the max heap size. Is will_go_oom_on_acquire used before acquring memory?"
         );
 
+        // Should we poll to attempt to GC?
+        // - If tls is collector, we cannot attempt a GC.
+        // - If gc is disabled, we cannot attempt a GC.
+        // - If overcommit is allowed, we don't attempt a GC.
+        let should_poll = VM::VMActivePlan::is_mutator(tls)
+            && VM::VMCollection::is_collection_enabled()
+            && !alloc_options.on_fail.allow_overcommit();
+        // Is a GC allowed here? If we should poll but are not allowed to poll, we will panic.
+        // initialize_collection() has to be called so we know GC is initialized.
+        let allow_gc = should_poll
+            && self.common().global_state.is_initialized()
+            && alloc_options.on_fail.allow_gc();
+
         trace!("Reserving pages");
         let pr = self.get_page_resource();
         let pages_reserved = pr.reserve_pages(pages);
@@ -117,22 +130,7 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
         // Whether we have triggered GC.  There are two places in this function that can trigger GC.
         let mut gc_triggered = false;
 
-        let is_mutator = VM::VMActivePlan::is_mutator(tls);
-        let is_collection_enabled = VM::VMCollection::is_collection_enabled();
-        let allow_overcommit = alloc_options.on_fail.allow_overcommit();
-
-        // In some cases, allocation failure cannot be tolerated.
-        // - Collector threads cannot fail when copying objects.
-        // - When GC is disabled, allocation failure will result in immediate panic.
-        let must_succeed = !is_mutator || !is_collection_enabled;
-
-        // Is this allocation site allowed to trigger a GC?
-        // - If allocation failure cannot be tolerated, we won't trigger GC.
-        // - The `OnAllocationFail::OverCommit` case does not allow triggering GC.
-        //   TODO: We can allow it to trigger GC AND over-commit at the same time.
-        let allow_triggering_gc = !must_succeed && !allow_overcommit;
-
-        if allow_triggering_gc {
+        if should_poll {
             gc_triggered = self.get_gc_trigger().poll(false, Some(self.as_space()));
             debug!("Polled.  GC triggered? {gc_triggered}");
         }
@@ -252,44 +250,35 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
         // Clear the request
         pr.clear_request(pages_reserved);
 
-        if must_succeed {
-            panic!("Failed to acquire pages.  is mutator? {is_mutator}, is collection enabled? {is_collection_enabled}");
+        // If we do not want GC on fail, just return zero.
+        if !alloc_options.on_fail.allow_gc() {
+            return Address::ZERO;
         }
 
-        if allow_triggering_gc {
+        // Otherwise do GC here
+        assert!(
+            allow_gc,
+            "{}",
             if attempted_allocation_and_failed {
-                // If we reached here because we attempted allocation and failed,
-                // then we poll again with space_full=true.
-                // This time it must trigger GC.
-                gc_triggered = self.get_gc_trigger().poll(true, Some(self.as_space()));
-                debug_assert!(gc_triggered, "GC not performed when forced.");
+                // We thought we had memory to allocate, but somehow failed the allocation. Will force a GC.
+                "Physical allocation failed when GC is not allowed!"
+            } else {
+                "GC is not allowed here: collection is not initialized (did you call initialize_collection()?)."
             }
+        );
 
-            // Are we allowed to block for GC?
-            // - If the MMTk instance is not initialized, there is no GC worker threads, and we can't block for GC.
-            // - If the on_fail option does not allow blocking for GC, we don't block.
-            // TODO: on_fail.allow_gc() really means whether it allows blocking for GC instead of whether we allow GC to happen.
-            let allow_blocking_for_gc = allow_triggering_gc
-                && self.common().global_state.is_initialized()
-                && alloc_options.on_fail.allow_gc();
-
-            if allow_blocking_for_gc {
-                assert!(
-                    gc_triggered,
-                    "Attempt to block for GC when GC is not triggered."
-                );
-
-                // Inform GC trigger about the pending allocation.
-                // Make sure we include side metadata.
-                let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
-                let total_pages_reserved = pages_reserved + meta_pages_reserved;
-                self.get_gc_trigger()
-                    .policy
-                    .on_pending_allocation(total_pages_reserved);
-
-                VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
-            }
+        if attempted_allocation_and_failed {
+            let gc_performed = self.get_gc_trigger().poll(true, Some(self.as_space()));
+            debug_assert!(gc_performed, "GC not performed when forced.");
         }
+        // Inform GC trigger about the pending allocation.
+        let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
+        let total_pages_reserved = pages_reserved + meta_pages_reserved;
+        self.get_gc_trigger()
+            .policy
+            .on_pending_allocation(total_pages_reserved);
+
+        VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
 
         // Return zero -- the caller will handle re-attempting allocation
         Address::ZERO

From 06680f10f3937f6dba1ebc74432842e4c1a8d086 Mon Sep 17 00:00:00 2001
From: Kunshan Wang <wks1986@gmail.com>
Date: Thu, 9 Oct 2025 17:43:12 +0800
Subject: [PATCH 3/3] Add another log, just to be consistent

---
 src/policy/space.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/policy/space.rs b/src/policy/space.rs
index c9778f937c..6b7546550d 100644
--- a/src/policy/space.rs
+++ b/src/policy/space.rs
@@ -268,8 +268,9 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
         );
 
         if attempted_allocation_and_failed {
-            let gc_performed = self.get_gc_trigger().poll(true, Some(self.as_space()));
-            debug_assert!(gc_performed, "GC not performed when forced.");
+            let gc_triggered = self.get_gc_trigger().poll(true, Some(self.as_space()));
+            debug!("Polled.  GC triggered? {gc_triggered}");
+            debug_assert!(gc_triggered, "GC not triggered when forced.");
         }
         // Inform GC trigger about the pending allocation.
         let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);