File tree Expand file tree Collapse file tree 2 files changed +17
-1
lines changed
cuda_core/cuda/core/experimental Expand file tree Collapse file tree 2 files changed +17
-1
lines changed Original file line number Diff line number Diff line change @@ -5,7 +5,6 @@ __pycache__/
55
66# C extensions
77* .so
8- * .c
98
109# CUDA Python specific
1110.cache /
Original file line number Diff line number Diff line change @@ -333,6 +333,23 @@ class DeviceMemoryResource(MemoryResource):
333333 self ._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
334334 self ._dev_id = device_id
335335
336+ # Set a higher release threshold to improve performance when there are no active allocations.
337+ # By default, the release threshold is 0, which means memory is immediately released back
338+ # to the OS when there are no active suballocations, causing performance issues.
339+ # Check current release threshold
340+ current_threshold = handle_return(
341+ driver.cuMemPoolGetAttribute(self ._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD)
342+ )
343+ # If threshold is 0 (default), set it to maximum to retain memory in the pool
344+ if int (current_threshold) == 0 :
345+ handle_return(
346+ driver.cuMemPoolSetAttribute(
347+ self ._handle,
348+ driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
349+ driver.cuuint64_t(0xFFFFFFFFFFFFFFFF ),
350+ )
351+ )
352+
336353 def allocate (self , size: int , stream: Stream = None ) -> Buffer:
337354 """Allocate a buffer of the requested size.
338355
You can’t perform that action at this time.
0 commit comments