Skip to content

Commit 06dac9c

Browse files
Copilotleofang
andcommitted
Fix merge conflict and undo broad *.c exclusion in .gitignore
Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
1 parent bc47c54 commit 06dac9c

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ __pycache__/
55

66
# C extensions
77
*.so
8-
*.c
98

109
# CUDA Python specific
1110
.cache/

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,23 @@ class DeviceMemoryResource(MemoryResource):
333333
self._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
334334
self._dev_id = device_id
335335

336+
# Set a higher release threshold to improve performance when there are no active allocations.
337+
# By default, the release threshold is 0, which means memory is immediately released back
338+
# to the OS when there are no active suballocations, causing performance issues.
339+
# Check current release threshold
340+
current_threshold = handle_return(
341+
driver.cuMemPoolGetAttribute(self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD)
342+
)
343+
# If threshold is 0 (default), set it to maximum to retain memory in the pool
344+
if int(current_threshold) == 0:
345+
handle_return(
346+
driver.cuMemPoolSetAttribute(
347+
self._handle,
348+
driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
349+
driver.cuuint64_t(0xFFFFFFFFFFFFFFFF),
350+
)
351+
)
352+
336353
def allocate(self, size: int, stream: Stream = None) -> Buffer:
337354
"""Allocate a buffer of the requested size.
338355

0 commit comments

Comments
 (0)