Skip to content

Commit f3cb5a2

Browse files
authored
Checking for RDMA support before allocating via VMM in test suite (#1179)
* Checking for RDMA support before allocating via VMM * whitespace * Improving the test_memory suite. * improving tests and skip checks
1 parent 2a46c40 commit f3cb5a2

File tree

2 files changed

+47
-15
lines changed

2 files changed

+47
-15
lines changed

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,7 @@ class VirtualMemoryResourceOptions:
11321132
location_type: VirtualMemoryLocationTypeT = "device"
11331133
handle_type: VirtualMemoryHandleTypeT = "posix_fd"
11341134
granularity: VirtualMemoryGranularityT = "recommended"
1135-
gpu_direct_rdma: bool = True
1135+
gpu_direct_rdma: bool = False
11361136
addr_hint: Optional[int] = 0
11371137
addr_align: Optional[int] = None
11381138
peers: Iterable[int] = field(default_factory=tuple)
@@ -1211,6 +1211,11 @@ class VirtualMemoryResource(MemoryResource):
12111211
if platform.system() == "Windows":
12121212
raise NotImplementedError("VirtualMemoryResource is not supported on Windows")
12131213

1214+
# Validate RDMA support if requested
1215+
if self.config.gpu_direct_rdma and self.device is not None:
1216+
if not self.device.properties.gpu_direct_rdma_supported:
1217+
raise RuntimeError("GPU Direct RDMA is not supported on this device")
1218+
12141219
@staticmethod
12151220
def _align_up(size: int, gran: int) -> int:
12161221
"""

cuda_core/tests/test_memory.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from cuda.core.experimental._utils.cuda_utils import handle_return
2929
from cuda.core.experimental.utils import StridedMemoryView
3030

31-
from cuda_python_test_helpers import IS_WSL, supports_ipc_mempool
31+
from cuda_python_test_helpers import supports_ipc_mempool
3232

3333
POOL_SIZE = 2097152 # 2MB size
3434

@@ -322,13 +322,13 @@ def test_vmm_allocator_basic_allocation():
322322
This test verifies that VirtualMemoryResource can allocate memory
323323
using CUDA VMM APIs with default configuration.
324324
"""
325-
if platform.system() == "Windows":
326-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
327-
if IS_WSL:
328-
pytest.skip("VirtualMemoryResource is not supported on WSL")
329-
330325
device = Device()
331326
device.set_current()
327+
328+
# Skip if virtual memory management is not supported
329+
if not device.properties.virtual_memory_management_supported:
330+
pytest.skip("Virtual memory management is not supported on this device")
331+
332332
options = VirtualMemoryResourceOptions()
333333
# Create VMM allocator with default config
334334
vmm_mr = VirtualMemoryResource(device, config=options)
@@ -361,13 +361,17 @@ def test_vmm_allocator_policy_configuration():
361361
with different allocation policies and that the configuration affects
362362
the allocation behavior.
363363
"""
364-
if platform.system() == "Windows":
365-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
366-
if IS_WSL:
367-
pytest.skip("VirtualMemoryResource is not supported on WSL")
368364
device = Device()
369365
device.set_current()
370366

367+
# Skip if virtual memory management is not supported
368+
if not device.properties.virtual_memory_management_supported:
369+
pytest.skip("Virtual memory management is not supported on this device")
370+
371+
# Skip if GPU Direct RDMA is supported (we want to test the unsupported case)
372+
if not device.properties.gpu_direct_rdma_supported:
373+
pytest.skip("This test requires a device that doesn't support GPU Direct RDMA")
374+
371375
# Test with custom VMM config
372376
custom_config = VirtualMemoryResourceOptions(
373377
allocation_type="pinned",
@@ -420,13 +424,13 @@ def test_vmm_allocator_grow_allocation():
420424
This test verifies that VirtualMemoryResource can grow existing
421425
allocations while preserving the base pointer when possible.
422426
"""
423-
if platform.system() == "Windows":
424-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
425-
if IS_WSL:
426-
pytest.skip("VirtualMemoryResource is not supported on WSL")
427427
device = Device()
428428
device.set_current()
429429

430+
# Skip if virtual memory management is not supported (we need it for VMM)
431+
if not device.properties.virtual_memory_management_supported:
432+
pytest.skip("Virtual memory management is not supported on this device")
433+
430434
options = VirtualMemoryResourceOptions()
431435

432436
vmm_mr = VirtualMemoryResource(device, config=options)
@@ -458,6 +462,29 @@ def test_vmm_allocator_grow_allocation():
458462
grown_buffer.close()
459463

460464

465+
def test_vmm_allocator_rdma_unsupported_exception():
466+
"""Test that VirtualMemoryResource throws an exception when RDMA is requested but device doesn't support it.
467+
468+
This test verifies that the VirtualMemoryResource constructor throws a RuntimeError
469+
when gpu_direct_rdma=True is requested but the device doesn't support virtual memory management.
470+
"""
471+
device = Device()
472+
device.set_current()
473+
474+
# Skip if virtual memory management is not supported (we need it for VMM)
475+
if not device.properties.virtual_memory_management_supported:
476+
pytest.skip("Virtual memory management is not supported on this device")
477+
478+
# Skip if GPU Direct RDMA is supported (we want to test the unsupported case)
479+
if device.properties.gpu_direct_rdma_supported:
480+
pytest.skip("This test requires a device that doesn't support GPU Direct RDMA")
481+
482+
# Test that requesting RDMA on an unsupported device throws an exception
483+
options = VirtualMemoryResourceOptions(gpu_direct_rdma=True)
484+
with pytest.raises(RuntimeError, match="GPU Direct RDMA is not supported on this device"):
485+
VirtualMemoryResource(device, config=options)
486+
487+
461488
def test_mempool(mempool_device):
462489
device = mempool_device
463490

0 commit comments

Comments
 (0)