From d312f5eeca2b2c9d48fa48a8de2ca3a079651331 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:35:19 +0000 Subject: [PATCH 01/22] Initial plan From c255dee3e680e0171526d8d8e609623b3803a8fd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:47:33 +0000 Subject: [PATCH 02/22] Add CUDA backend support with runtime detection Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/_hip.py | 170 ++++++++++++++++++++++++++++++++++++++++++++ iris/cuda.py | 164 +++++++++++++++++++++++++++++++++++++++++++ iris/hip.py | 194 ++++++++++++--------------------------------------- iris/iris.py | 4 +- 4 files changed, 380 insertions(+), 152 deletions(-) create mode 100644 iris/_hip.py create mode 100644 iris/cuda.py diff --git a/iris/_hip.py b/iris/_hip.py new file mode 100644 index 00000000..f6f4d8ff --- /dev/null +++ b/iris/_hip.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +import ctypes +import numpy as np +import sys +import torch +import subprocess +import os + +rt_path = "libamdhip64.so" +hip_runtime = ctypes.cdll.LoadLibrary(rt_path) + + +def hip_try(err): + if err != 0: + hip_runtime.hipGetErrorString.restype = ctypes.c_char_p + error_string = hip_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") + raise RuntimeError(f"HIP error code {err}: {error_string}") + + +class hipIpcMemHandle_t(ctypes.Structure): + _fields_ = [("reserved", ctypes.c_char * 64)] + + +def get_ipc_handle_size(): + """Return the size of IPC handle in bytes (64 for HIP).""" + return 64 + + +def open_ipc_handle(ipc_handle_data, rank): + ptr = ctypes.c_void_p() + hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) + hip_runtime.hipIpcOpenMemHandle.argtypes = [ + ctypes.POINTER(ctypes.c_void_p), + hipIpcMemHandle_t, + ctypes.c_uint, + ] + if isinstance(ipc_handle_data, np.ndarray): + if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != 64: + raise ValueError("ipc_handle_data must be a 64-element uint8 numpy array") + ipc_handle_bytes = ipc_handle_data.tobytes() + ipc_handle_data = (ctypes.c_char * 64).from_buffer_copy(ipc_handle_bytes) + else: + raise TypeError("ipc_handle_data must be a numpy.ndarray of dtype uint8 with 64 elements") + + raw_memory = ctypes.create_string_buffer(64) + ctypes.memset(raw_memory, 0x00, 64) + ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) + ipc_handle_data_bytes = bytes(ipc_handle_data) + ctypes.memmove(raw_memory, ipc_handle_data_bytes, 64) + + hip_try( + hip_runtime.hipIpcOpenMemHandle( + ctypes.byref(ptr), + ipc_handle_struct, + hipIpcMemLazyEnablePeerAccess, + ) + ) + + return ptr.value + + +def get_ipc_handle(ptr, rank): + ipc_handle = hipIpcMemHandle_t() + hip_try(hip_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + return ipc_handle + + +def count_devices(): + device_count = ctypes.c_int() + hip_try(hip_runtime.hipGetDeviceCount(ctypes.byref(device_count))) + return device_count.value + + +def set_device(gpu_id): + hip_try(hip_runtime.hipSetDevice(gpu_id)) + + +def get_device_id(): + device_id = ctypes.c_int() + hip_try(hip_runtime.hipGetDevice(ctypes.byref(device_id))) + return device_id.value + + +def get_cu_count(device_id=None): + if device_id is None: + device_id = get_device_id() + + hipDeviceAttributeMultiprocessorCount = 63 + cu_count = ctypes.c_int() + + hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) + + return cu_count.value + + +def get_rocm_version(): + major, minor = -1, -1 + + # Try hipconfig --path first + try: + result = subprocess.run(["hipconfig", "--path"], capture_output=True, text=True, check=True) + rocm_path = result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError): + # Then look for $ROCM_PATH environment variable + rocm_path = os.environ.get("ROCM_PATH") + if not rocm_path: + # Finally, try default location + rocm_path = "/opt/rocm" + + # Try to read version from .info/version file + try: + version_file_path = os.path.join(rocm_path, ".info", "version") + with open(version_file_path, "r") as version_file: + version = version_file.readline().strip() + major = int(version.split(".")[0]) + minor = int(version.split(".")[1]) + except (FileNotFoundError, IOError, ValueError, IndexError): + # If we can't read the version file, return -1, -1 + pass + + return (major, minor) + + +def get_wall_clock_rate(device_id): + hipDeviceAttributeWallClockRate = 10017 + wall_clock_rate = ctypes.c_int() + status = hip_runtime.hipDeviceGetAttribute( + ctypes.byref(wall_clock_rate), hipDeviceAttributeWallClockRate, device_id + ) + hip_try(status) + return wall_clock_rate.value + + +def get_arch_string(device_id=None): + if device_id is None: + device_id = get_device_id() + arch_full = torch.cuda.get_device_properties(device_id).gcnArchName + arch_name = arch_full.split(":")[0] + return arch_name + + +def get_num_xcc(device_id=None): + if device_id is None: + device_id = get_device_id() + rocm_major, _ = get_rocm_version() + if rocm_major < 7: + return 8 + hipDeviceAttributeNumberOfXccs = 10018 + xcc_count = ctypes.c_int() + hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) + return xcc_count.value + + +def malloc_fine_grained(size): + hipDeviceMallocFinegrained = 0x1 + ptr = ctypes.c_void_p() + hip_try(hip_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) + return ptr + + +def hip_malloc(size): + ptr = ctypes.c_void_p() + hip_try(hip_runtime.hipMalloc(ctypes.byref(ptr), size)) + return ptr + + +def hip_free(ptr): + hip_try(hip_runtime.hipFree(ptr)) diff --git a/iris/cuda.py b/iris/cuda.py new file mode 100644 index 00000000..aa7a04ea --- /dev/null +++ b/iris/cuda.py @@ -0,0 +1,164 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +import ctypes +import numpy as np +import sys +import torch +import subprocess +import os + +rt_path = "libcudart.so" +cuda_runtime = ctypes.cdll.LoadLibrary(rt_path) + + +def hip_try(err): + if err != 0: + cuda_runtime.cudaGetErrorString.restype = ctypes.c_char_p + error_string = cuda_runtime.cudaGetErrorString(ctypes.c_int(err)).decode("utf-8") + raise RuntimeError(f"CUDA error code {err}: {error_string}") + + +class hipIpcMemHandle_t(ctypes.Structure): + _fields_ = [("internal", ctypes.c_byte * 128)] + + +def get_ipc_handle_size(): + """Return the size of IPC handle in bytes (128 for CUDA).""" + return 128 + + +def open_ipc_handle(ipc_handle_data, rank): + ptr = ctypes.c_void_p() + cudaIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) + cuda_runtime.cudaIpcOpenMemHandle.argtypes = [ + ctypes.POINTER(ctypes.c_void_p), + hipIpcMemHandle_t, + ctypes.c_uint, + ] + if isinstance(ipc_handle_data, np.ndarray): + if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != 128: + raise ValueError("ipc_handle_data must be a 128-element uint8 numpy array") + ipc_handle_bytes = ipc_handle_data.tobytes() + ipc_handle_data = (ctypes.c_char * 128).from_buffer_copy(ipc_handle_bytes) + else: + raise TypeError("ipc_handle_data must be a numpy.ndarray of dtype uint8 with 128 elements") + + raw_memory = ctypes.create_string_buffer(128) + ctypes.memset(raw_memory, 0x00, 128) + ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) + ipc_handle_data_bytes = bytes(ipc_handle_data) + ctypes.memmove(raw_memory, ipc_handle_data_bytes, 128) + + hip_try( + cuda_runtime.cudaIpcOpenMemHandle( + ctypes.byref(ptr), + ipc_handle_struct, + cudaIpcMemLazyEnablePeerAccess, + ) + ) + + return ptr.value + + +def get_ipc_handle(ptr, rank): + ipc_handle = hipIpcMemHandle_t() + hip_try(cuda_runtime.cudaIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + return ipc_handle + + +def count_devices(): + device_count = ctypes.c_int() + hip_try(cuda_runtime.cudaGetDeviceCount(ctypes.byref(device_count))) + return device_count.value + + +def set_device(gpu_id): + hip_try(cuda_runtime.cudaSetDevice(gpu_id)) + + +def get_device_id(): + device_id = ctypes.c_int() + hip_try(cuda_runtime.cudaGetDevice(ctypes.byref(device_id))) + return device_id.value + + +def get_cu_count(device_id=None): + if device_id is None: + device_id = get_device_id() + + cudaDeviceAttributeMultiprocessorCount = 16 + cu_count = ctypes.c_int() + + hip_try( + cuda_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDeviceAttributeMultiprocessorCount, device_id) + ) + + return cu_count.value + + +def get_rocm_version(): + # Return CUDA version instead + major, minor = -1, -1 + + # Try nvcc --version + try: + result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, check=True) + # Parse version from output like "release 12.0, V12.0.76" + for line in result.stdout.split("\n"): + if "release" in line.lower(): + version_part = line.split("release")[1].strip().split(",")[0] + parts = version_part.split(".") + if len(parts) >= 2: + major = int(parts[0]) + minor = int(parts[1]) + break + except (subprocess.CalledProcessError, FileNotFoundError, ValueError, IndexError): + # If we can't get CUDA version, try environment variable + cuda_version = os.environ.get("CUDA_VERSION") + if cuda_version: + try: + parts = cuda_version.split(".") + major = int(parts[0]) + minor = int(parts[1]) if len(parts) > 1 else 0 + except (ValueError, IndexError): + pass + + return (major, minor) + + +def get_wall_clock_rate(device_id): + cudaDevAttrMemoryClockRate = 36 + wall_clock_rate = ctypes.c_int() + status = cuda_runtime.cudaDeviceGetAttribute(ctypes.byref(wall_clock_rate), cudaDevAttrMemoryClockRate, device_id) + hip_try(status) + return wall_clock_rate.value + + +def get_arch_string(device_id=None): + if device_id is None: + device_id = get_device_id() + # For CUDA, get compute capability + device_props = torch.cuda.get_device_properties(device_id) + major = device_props.major + minor = device_props.minor + return f"sm_{major}{minor}" + + +def get_num_xcc(device_id=None): + # XCC is AMD-specific, return 1 for CUDA + return 1 + + +def malloc_fine_grained(size): + return hip_malloc(size) + + +def hip_malloc(size): + ptr = ctypes.c_void_p() + hip_try(cuda_runtime.cudaMalloc(ctypes.byref(ptr), size)) + return ptr + + +def hip_free(ptr): + hip_try(cuda_runtime.cudaFree(ptr)) diff --git a/iris/hip.py b/iris/hip.py index ba9e2051..bf6d73c9 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -1,165 +1,57 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. -import ctypes -import numpy as np -import sys -import torch -import subprocess -import os - -rt_path = "libamdhip64.so" -hip_runtime = ctypes.cdll.LoadLibrary(rt_path) - - -def hip_try(err): - if err != 0: - hip_runtime.hipGetErrorString.restype = ctypes.c_char_p - error_string = hip_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") - raise RuntimeError(f"HIP error code {err}: {error_string}") - - -class hipIpcMemHandle_t(ctypes.Structure): - _fields_ = [("reserved", ctypes.c_char * 64)] - - -def open_ipc_handle(ipc_handle_data, rank): - ptr = ctypes.c_void_p() - hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) - hip_runtime.hipIpcOpenMemHandle.argtypes = [ - ctypes.POINTER(ctypes.c_void_p), - hipIpcMemHandle_t, - ctypes.c_uint, - ] - if isinstance(ipc_handle_data, np.ndarray): - if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != 64: - raise ValueError("ipc_handle_data must be a 64-element uint8 numpy array") - ipc_handle_bytes = ipc_handle_data.tobytes() - ipc_handle_data = (ctypes.c_char * 64).from_buffer_copy(ipc_handle_bytes) - else: - raise TypeError("ipc_handle_data must be a numpy.ndarray of dtype uint8 with 64 elements") - - raw_memory = ctypes.create_string_buffer(64) - ctypes.memset(raw_memory, 0x00, 64) - ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) - ipc_handle_data_bytes = bytes(ipc_handle_data) - ctypes.memmove(raw_memory, ipc_handle_data_bytes, 64) - - hip_try( - hip_runtime.hipIpcOpenMemHandle( - ctypes.byref(ptr), - ipc_handle_struct, - hipIpcMemLazyEnablePeerAccess, - ) - ) - - return ptr.value - - -def get_ipc_handle(ptr, rank): - ipc_handle = hipIpcMemHandle_t() - hip_try(hip_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) - return ipc_handle - - -def count_devices(): - device_count = ctypes.c_int() - hip_try(hip_runtime.hipGetDeviceCount(ctypes.byref(device_count))) - return device_count.value - - -def set_device(gpu_id): - hip_try(hip_runtime.hipSetDevice(gpu_id)) - +""" +HIP-compatible API facade for Iris. -def get_device_id(): - device_id = ctypes.c_int() - hip_try(hip_runtime.hipGetDevice(ctypes.byref(device_id))) - return device_id.value +This module provides a HIP-compatible interface that transparently redirects +to either the HIP backend (AMD GPUs) or CUDA backend (NVIDIA GPUs) based on +runtime detection or configuration. +The backend is selected based on: +1. IRIS_BACKEND environment variable (set to 'cuda' or 'hip') +2. Auto-detection based on available libraries +""" -def get_cu_count(device_id=None): - if device_id is None: - device_id = get_device_id() - - hipDeviceAttributeMultiprocessorCount = 63 - cu_count = ctypes.c_int() - - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) - - return cu_count.value - - -def get_rocm_version(): - major, minor = -1, -1 +import os +import sys - # Try hipconfig --path first +# Detect backend +def _detect_backend(): + """Detect which backend to use based on environment and available libraries.""" + backend_env = os.environ.get("IRIS_BACKEND", "").lower() + if backend_env in ("cuda", "nvidia"): + return "cuda" + elif backend_env in ("hip", "amd", "rocm"): + return "hip" + + # Auto-detect by trying to load libraries + import ctypes try: - result = subprocess.run(["hipconfig", "--path"], capture_output=True, text=True, check=True) - rocm_path = result.stdout.strip() - except (subprocess.CalledProcessError, FileNotFoundError): - # Then look for $ROCM_PATH environment variable - rocm_path = os.environ.get("ROCM_PATH") - if not rocm_path: - # Finally, try default location - rocm_path = "/opt/rocm" - - # Try to read version from .info/version file + ctypes.cdll.LoadLibrary("libamdhip64.so") + return "hip" + except (OSError, FileNotFoundError): + pass + try: - version_file_path = os.path.join(rocm_path, ".info", "version") - with open(version_file_path, "r") as version_file: - version = version_file.readline().strip() - major = int(version.split(".")[0]) - minor = int(version.split(".")[1]) - except (FileNotFoundError, IOError, ValueError, IndexError): - # If we can't read the version file, return -1, -1 + ctypes.cdll.LoadLibrary("libcudart.so") + return "cuda" + except (OSError, FileNotFoundError): pass - - return (major, minor) - - -def get_wall_clock_rate(device_id): - hipDeviceAttributeWallClockRate = 10017 - wall_clock_rate = ctypes.c_int() - status = hip_runtime.hipDeviceGetAttribute( - ctypes.byref(wall_clock_rate), hipDeviceAttributeWallClockRate, device_id - ) - hip_try(status) - return wall_clock_rate.value - - -def get_arch_string(device_id=None): - if device_id is None: - device_id = get_device_id() - arch_full = torch.cuda.get_device_properties(device_id).gcnArchName - arch_name = arch_full.split(":")[0] - return arch_name - - -def get_num_xcc(device_id=None): - if device_id is None: - device_id = get_device_id() - rocm_major, _ = get_rocm_version() - if rocm_major < 7: - return 8 - hipDeviceAttributeNumberOfXccs = 10018 - xcc_count = ctypes.c_int() - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) - return xcc_count.value - - -def malloc_fine_grained(size): - hipDeviceMallocFinegrained = 0x1 - ptr = ctypes.c_void_p() - hip_try(hip_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) - return ptr + + # Default to hip for backward compatibility + return "hip" -def hip_malloc(size): - ptr = ctypes.c_void_p() - hip_try(hip_runtime.hipMalloc(ctypes.byref(ptr), size)) - return ptr +_backend = _detect_backend() +# Import from the appropriate backend module +if _backend == "cuda": + from iris.cuda import * +else: + from iris._hip import * -def hip_free(ptr): - hip_try(hip_runtime.hipFree(ptr)) +# Make backend information available +def get_backend(): + """Get the currently active backend name ('hip' or 'cuda').""" + return _backend diff --git a/iris/iris.py b/iris/iris.py index be91eacb..5a9b5142 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -39,6 +39,7 @@ get_ipc_handle, open_ipc_handle, get_wall_clock_rate, + get_ipc_handle_size, ) import numpy as np import math @@ -89,7 +90,8 @@ def __init__(self, heap_size=1 << 30): heap_bases = np.zeros(num_ranks, dtype=np.uint64) heap_bases[cur_rank] = heap_base - ipc_handles = np.zeros((num_ranks, 64), dtype=np.uint8) + ipc_handle_size = get_ipc_handle_size() + ipc_handles = np.zeros((num_ranks, ipc_handle_size), dtype=np.uint8) ipc_handle = get_ipc_handle(heap_base_ptr, cur_rank) distributed_barrier() From 259afe6ba00220b732e3a621d251f38b3e3769a7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:51:49 +0000 Subject: [PATCH 03/22] Add unit tests and fix linting issues for CUDA backend Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/hip.py | 27 ++- tests/unittests/test_backend_detection.py | 233 ++++++++++++++++++++++ 2 files changed, 254 insertions(+), 6 deletions(-) create mode 100644 tests/unittests/test_backend_detection.py diff --git a/iris/hip.py b/iris/hip.py index bf6d73c9..5d820135 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -15,6 +15,8 @@ import os import sys +import importlib.util + # Detect backend def _detect_backend(): @@ -24,32 +26,45 @@ def _detect_backend(): return "cuda" elif backend_env in ("hip", "amd", "rocm"): return "hip" - + # Auto-detect by trying to load libraries import ctypes + try: ctypes.cdll.LoadLibrary("libamdhip64.so") return "hip" except (OSError, FileNotFoundError): pass - + try: ctypes.cdll.LoadLibrary("libcudart.so") return "cuda" except (OSError, FileNotFoundError): pass - + # Default to hip for backward compatibility return "hip" _backend = _detect_backend() -# Import from the appropriate backend module +# Load the appropriate backend module directly without triggering __init__.py +_module_dir = os.path.dirname(__file__) if _backend == "cuda": - from iris.cuda import * + _module_path = os.path.join(_module_dir, "cuda.py") + _spec = importlib.util.spec_from_file_location("iris._cuda_backend", _module_path) else: - from iris._hip import * + _module_path = os.path.join(_module_dir, "_hip.py") + _spec = importlib.util.spec_from_file_location("iris._hip_backend", _module_path) + +_runtime_module = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(_runtime_module) + +# Export all public symbols from the backend module +for _name in dir(_runtime_module): + if not _name.startswith("_"): + globals()[_name] = getattr(_runtime_module, _name) + # Make backend information available def get_backend(): diff --git a/tests/unittests/test_backend_detection.py b/tests/unittests/test_backend_detection.py new file mode 100644 index 00000000..ced3e850 --- /dev/null +++ b/tests/unittests/test_backend_detection.py @@ -0,0 +1,233 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Unit tests for backend detection and runtime module selection. + +These tests verify that the backend detection logic works correctly +and that the appropriate backend module is selected based on configuration. +""" + +import os +import sys +import pytest +import importlib +import importlib.util + + +def test_backend_detection_default(): + """Test that default backend is HIP when no environment variable is set.""" + # Clear any existing IRIS_BACKEND setting + old_env = os.environ.pop("IRIS_BACKEND", None) + + try: + # Load hip.py directly to test detection logic + spec = importlib.util.spec_from_file_location( + "hip_test", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + ) + hip_module = importlib.util.module_from_spec(spec) + + # Execute the module - detection happens before trying to load backend + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found, but detection logic runs first + pass + + # Check that backend was set to 'hip' (default) + assert hasattr(hip_module, '_backend') + assert hip_module._backend == 'hip', "Default backend should be 'hip'" + + finally: + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + + +def test_backend_detection_cuda_env(): + """Test that CUDA backend is selected when IRIS_BACKEND=cuda.""" + old_env = os.environ.get("IRIS_BACKEND") + + try: + os.environ["IRIS_BACKEND"] = "cuda" + + # Load hip.py directly + spec = importlib.util.spec_from_file_location( + "hip_test_cuda", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that backend was set to 'cuda' + assert hasattr(hip_module, '_backend') + assert hip_module._backend == 'cuda', "Backend should be 'cuda' when IRIS_BACKEND=cuda" + + finally: + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_backend_detection_nvidia_alias(): + """Test that CUDA backend is selected when IRIS_BACKEND=nvidia.""" + old_env = os.environ.get("IRIS_BACKEND") + + try: + os.environ["IRIS_BACKEND"] = "nvidia" + + # Load hip.py directly + spec = importlib.util.spec_from_file_location( + "hip_test_nvidia", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that backend was set to 'cuda' + assert hasattr(hip_module, '_backend') + assert hip_module._backend == 'cuda', "Backend should be 'cuda' when IRIS_BACKEND=nvidia" + + finally: + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_backend_detection_hip_env(): + """Test that HIP backend is selected when IRIS_BACKEND=hip.""" + old_env = os.environ.get("IRIS_BACKEND") + + try: + os.environ["IRIS_BACKEND"] = "hip" + + # Load hip.py directly + spec = importlib.util.spec_from_file_location( + "hip_test_hip", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that backend was set to 'hip' + assert hasattr(hip_module, '_backend') + assert hip_module._backend == 'hip', "Backend should be 'hip' when IRIS_BACKEND=hip" + + finally: + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_backend_detection_amd_alias(): + """Test that HIP backend is selected when IRIS_BACKEND=amd.""" + old_env = os.environ.get("IRIS_BACKEND") + + try: + os.environ["IRIS_BACKEND"] = "amd" + + # Load hip.py directly + spec = importlib.util.spec_from_file_location( + "hip_test_amd", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that backend was set to 'hip' + assert hasattr(hip_module, '_backend') + assert hip_module._backend == 'hip', "Backend should be 'hip' when IRIS_BACKEND=amd" + + finally: + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_ipc_handle_size_definition(): + """Test that IPC handle size functions are defined correctly in source files.""" + import re + + # Check _hip.py defines get_ipc_handle_size returning 64 + hip_file = os.path.join(os.path.dirname(__file__), "../../iris/_hip.py") + with open(hip_file, 'r') as f: + hip_content = f.read() + + assert 'def get_ipc_handle_size()' in hip_content, "_hip.py should define get_ipc_handle_size" + assert 'return 64' in hip_content, "_hip.py should return 64 for IPC handle size" + + # Check cuda.py defines get_ipc_handle_size returning 128 + cuda_file = os.path.join(os.path.dirname(__file__), "../../iris/cuda.py") + with open(cuda_file, 'r') as f: + cuda_content = f.read() + + assert 'def get_ipc_handle_size()' in cuda_content, "cuda.py should define get_ipc_handle_size" + assert 'return 128' in cuda_content, "cuda.py should return 128 for IPC handle size" + + +def test_hip_module_structure(): + """Test that hip.py has the expected structure for backend redirection.""" + hip_file = os.path.join(os.path.dirname(__file__), "../../iris/hip.py") + with open(hip_file, 'r') as f: + hip_content = f.read() + + # Check for backend detection function + assert '_detect_backend' in hip_content, "hip.py should have _detect_backend function" + + # Check for get_backend function + assert 'def get_backend():' in hip_content, "hip.py should have get_backend function" + + # Check for environment variable handling + assert 'IRIS_BACKEND' in hip_content, "hip.py should check IRIS_BACKEND environment variable" + + # Check for backend aliases + assert 'cuda' in hip_content and 'nvidia' in hip_content, "hip.py should support cuda/nvidia aliases" + assert 'hip' in hip_content and 'amd' in hip_content, "hip.py should support hip/amd aliases" + + +def test_iris_py_uses_get_ipc_handle_size(): + """Test that iris.py uses get_ipc_handle_size from hip module.""" + iris_file = os.path.join(os.path.dirname(__file__), "../../iris/iris.py") + with open(iris_file, 'r') as f: + iris_content = f.read() + + # Check that get_ipc_handle_size is imported + assert 'get_ipc_handle_size' in iris_content, "iris.py should import get_ipc_handle_size" + + # Check that it's used instead of hardcoded 64 + assert 'ipc_handle_size = get_ipc_handle_size()' in iris_content, "iris.py should call get_ipc_handle_size()" + + +if __name__ == "__main__": + # Run tests + pytest.main([__file__, "-v"]) + From 3f3e78e5c69843fefc21a6909c33e0adc3810c7c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:52:45 +0000 Subject: [PATCH 04/22] Update README with CUDA backend documentation Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2131d3f1..774d0e42 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,9 @@ if __name__ == "__main__": ### Quick Installation > [!NOTE] -> **Requirements**: Python 3.10+, PyTorch 2.0+ (ROCm version), ROCm 6.3.1+ HIP runtime, and Triton +> **Requirements**: Python 3.10+, PyTorch 2.0+, and Triton +> - For AMD GPUs: ROCm 6.3.1+ HIP runtime and PyTorch ROCm version +> - For NVIDIA GPUs: CUDA 11.0+ runtime and PyTorch CUDA version For a quick installation directly from the repository: @@ -111,6 +113,19 @@ For a quick installation directly from the repository: pip install git+https://github.com/ROCm/iris.git ``` +To use Iris with NVIDIA GPUs, set the backend before importing: + +```bash +export IRIS_BACKEND=cuda +``` + +Or install with the backend specified: + +```shell +# Note: Backend selection is via environment variable, not install-time config +IRIS_BACKEND=cuda pip install git+https://github.com/ROCm/iris.git +``` + ### Docker Compose (Recommended for Development) The recommended way to get started is using Docker Compose, which provides a development environment with the Iris directory mounted inside the container. This allows you to make changes to the code outside the container and see them reflected inside. @@ -139,11 +154,44 @@ Check out our [examples](examples/) directory for ready-to-run scripts and usage Iris currently supports: +### AMD GPUs (HIP Backend) - MI300X, MI350X & MI355X +### NVIDIA GPUs (CUDA Backend) +- All CUDA-capable GPUs with compute capability 7.0+ + > [!NOTE] > Iris may work on other AMD GPUs with ROCm compatibility. +### Backend Selection + +Iris automatically detects the available GPU backend at runtime. You can also explicitly select the backend using the `IRIS_BACKEND` environment variable: + +```bash +# Use CUDA backend for NVIDIA GPUs +export IRIS_BACKEND=cuda +# or +export IRIS_BACKEND=nvidia + +# Use HIP backend for AMD GPUs (default) +export IRIS_BACKEND=hip +# or +export IRIS_BACKEND=amd +``` + +Alternatively, set the environment variable in your Python script: + +```python +import os +os.environ["IRIS_BACKEND"] = "cuda" # Must be set before importing iris +import iris +``` + +The backend detection priority is: +1. `IRIS_BACKEND` environment variable +2. Auto-detection based on available GPU libraries +3. Default to HIP for backward compatibility + ## Roadmap We plan to extend Iris with the following features: From c6a9aa3cf77fc7de3e892026c94f96779269c4ca Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:54:32 +0000 Subject: [PATCH 05/22] Add CUDA backend example script Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- examples/cuda_backend_example.py | 92 ++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 examples/cuda_backend_example.py diff --git a/examples/cuda_backend_example.py b/examples/cuda_backend_example.py new file mode 100644 index 00000000..ba343b2d --- /dev/null +++ b/examples/cuda_backend_example.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Example demonstrating how to use Iris with CUDA backend for NVIDIA GPUs. + +This script shows how to set the backend and verify it's correctly loaded. +""" + +import os +import sys + +# Set CUDA backend before importing iris +# This must be done before importing iris to take effect +os.environ["IRIS_BACKEND"] = "cuda" + +# Now import iris - it will use the CUDA backend +import iris + +def main(): + """ + Demonstrate CUDA backend usage with Iris. + + This example shows: + 1. How to set the CUDA backend + 2. How to verify the backend is loaded + 3. Basic Iris operations with CUDA + """ + + print("=" * 60) + print("Iris CUDA Backend Example") + print("=" * 60) + + # Check which backend is being used + try: + backend = iris.hip.get_backend() + print(f"✓ Backend loaded: {backend}") + + if backend == "cuda": + print("✓ Successfully using CUDA backend for NVIDIA GPUs") + else: + print(f"! Note: Using {backend} backend instead of CUDA") + except Exception as e: + print(f"✗ Could not determine backend: {e}") + + # Initialize Iris with a symmetric heap + heap_size = 1 << 30 # 1 GB + print(f"\nInitializing Iris with {heap_size / (1024**3):.1f} GB heap...") + + try: + ctx = iris.iris(heap_size) + print(f"✓ Iris initialized successfully") + print(f" - Rank: {ctx.get_rank()}") + print(f" - Number of ranks: {ctx.get_num_ranks()}") + print(f" - Device: {ctx.get_device()}") + print(f" - Compute units: {ctx.get_cu_count()}") + + # Allocate a tensor on the symmetric heap + print("\nAllocating tensor on symmetric heap...") + tensor = ctx.zeros(1000, 1000, dtype=torch.float32) + print(f"✓ Tensor allocated: shape={tensor.shape}, dtype={tensor.dtype}") + print(f" - On symmetric heap: {ctx._Iris__on_symmetric_heap(tensor)}") + print(f" - Device: {tensor.device}") + + except Exception as e: + print(f"✗ Error initializing Iris: {e}") + print("\nNote: This example requires:") + print(" - NVIDIA GPU with CUDA support") + print(" - PyTorch with CUDA") + print(" - NCCL for distributed operations") + sys.exit(1) + + print("\n" + "=" * 60) + print("Example completed successfully!") + print("=" * 60) + + +if __name__ == "__main__": + import torch + import torch.distributed as dist + + # Check if CUDA is available + if not torch.cuda.is_available(): + print("Error: CUDA is not available. This example requires NVIDIA GPU.") + print("\nTo use Iris with AMD GPUs, use the default HIP backend:") + print(" python your_script.py # No IRIS_BACKEND needed") + sys.exit(1) + + # For this simple example, we'll run single-rank + # For multi-rank examples, see the examples/ directory + main() From 0ba8573ad4863cea0657bc1130a2a40aa9f66aee Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 00:57:52 +0000 Subject: [PATCH 06/22] Fix linting issues in example script Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- examples/cuda_backend_example.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/cuda_backend_example.py b/examples/cuda_backend_example.py index ba343b2d..534e42f3 100644 --- a/examples/cuda_backend_example.py +++ b/examples/cuda_backend_example.py @@ -21,48 +21,48 @@ def main(): """ Demonstrate CUDA backend usage with Iris. - + This example shows: 1. How to set the CUDA backend 2. How to verify the backend is loaded 3. Basic Iris operations with CUDA """ - + print("=" * 60) print("Iris CUDA Backend Example") print("=" * 60) - + # Check which backend is being used try: backend = iris.hip.get_backend() print(f"✓ Backend loaded: {backend}") - + if backend == "cuda": print("✓ Successfully using CUDA backend for NVIDIA GPUs") else: print(f"! Note: Using {backend} backend instead of CUDA") except Exception as e: print(f"✗ Could not determine backend: {e}") - + # Initialize Iris with a symmetric heap heap_size = 1 << 30 # 1 GB print(f"\nInitializing Iris with {heap_size / (1024**3):.1f} GB heap...") - + try: ctx = iris.iris(heap_size) - print(f"✓ Iris initialized successfully") + print("✓ Iris initialized successfully") print(f" - Rank: {ctx.get_rank()}") print(f" - Number of ranks: {ctx.get_num_ranks()}") print(f" - Device: {ctx.get_device()}") print(f" - Compute units: {ctx.get_cu_count()}") - + # Allocate a tensor on the symmetric heap print("\nAllocating tensor on symmetric heap...") tensor = ctx.zeros(1000, 1000, dtype=torch.float32) print(f"✓ Tensor allocated: shape={tensor.shape}, dtype={tensor.dtype}") print(f" - On symmetric heap: {ctx._Iris__on_symmetric_heap(tensor)}") print(f" - Device: {tensor.device}") - + except Exception as e: print(f"✗ Error initializing Iris: {e}") print("\nNote: This example requires:") @@ -70,7 +70,7 @@ def main(): print(" - PyTorch with CUDA") print(" - NCCL for distributed operations") sys.exit(1) - + print("\n" + "=" * 60) print("Example completed successfully!") print("=" * 60) @@ -79,14 +79,14 @@ def main(): if __name__ == "__main__": import torch import torch.distributed as dist - + # Check if CUDA is available if not torch.cuda.is_available(): print("Error: CUDA is not available. This example requires NVIDIA GPU.") print("\nTo use Iris with AMD GPUs, use the default HIP backend:") print(" python your_script.py # No IRIS_BACKEND needed") sys.exit(1) - + # For this simple example, we'll run single-rank # For multi-rank examples, see the examples/ directory main() From 28965e69ea9c769f720d825c79209f6cd892b762 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 01:08:29 +0000 Subject: [PATCH 07/22] Switch to build-time backend configuration with --config-settings - Add custom build backend (build_backend.py) to support --config-settings - Update pyproject.toml to use custom build backend - Modify iris/hip.py to prioritize build-time config over env vars - Update README with pip install --config-settings backend=nvidia examples - Add 3 new unit tests for build-time configuration (11 tests total pass) - Update example script to document both build-time and runtime methods - Add iris/.config/ to .gitignore Backend detection priority is now: 1. Build-time configuration (--config-settings backend=nvidia/hip) 2. IRIS_BACKEND environment variable (runtime) 3. Auto-detection based on available libraries 4. Default to HIP for backward compatibility Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .gitignore | 4 +- README.md | 41 ++++-- build_backend.py | 63 ++++++++++ examples/cuda_backend_example.py | 16 ++- iris/hip.py | 27 +++- pyproject.toml | 3 +- tests/unittests/test_backend_detection.py | 146 ++++++++++++++++++++++ 7 files changed, 278 insertions(+), 22 deletions(-) create mode 100644 build_backend.py diff --git a/.gitignore b/.gitignore index cbaac1bc..6242c995 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,6 @@ slurm*.out *.egg-info examples/gemm/results/* -asm/ \ No newline at end of file +asm/ +# Backend configuration (generated at build time) +iris/.config/ diff --git a/README.md b/README.md index 774d0e42..09f7d7ad 100644 --- a/README.md +++ b/README.md @@ -113,19 +113,21 @@ For a quick installation directly from the repository: pip install git+https://github.com/ROCm/iris.git ``` -To use Iris with NVIDIA GPUs, set the backend before importing: +To use Iris with NVIDIA GPUs, install with the CUDA backend: -```bash -export IRIS_BACKEND=cuda +```shell +pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia ``` -Or install with the backend specified: +To use Iris with AMD GPUs (default): ```shell -# Note: Backend selection is via environment variable, not install-time config -IRIS_BACKEND=cuda pip install git+https://github.com/ROCm/iris.git +pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip ``` +> [!NOTE] +> The backend can also be controlled at runtime via the `IRIS_BACKEND` environment variable if not set at build time. + ### Docker Compose (Recommended for Development) The recommended way to get started is using Docker Compose, which provides a development environment with the Iris directory mounted inside the container. This allows you to make changes to the code outside the container and see them reflected inside. @@ -165,7 +167,23 @@ Iris currently supports: ### Backend Selection -Iris automatically detects the available GPU backend at runtime. You can also explicitly select the backend using the `IRIS_BACKEND` environment variable: +Iris supports two methods for backend selection: + +#### 1. Build-time Configuration (Recommended) + +Install Iris with the desired backend using `--config-settings`: + +```bash +# For NVIDIA GPUs +pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia + +# For AMD GPUs +pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip +``` + +#### 2. Runtime Environment Variable + +If no backend was specified at build time, you can control it via the `IRIS_BACKEND` environment variable: ```bash # Use CUDA backend for NVIDIA GPUs @@ -179,7 +197,7 @@ export IRIS_BACKEND=hip export IRIS_BACKEND=amd ``` -Alternatively, set the environment variable in your Python script: +Or set it in your Python script: ```python import os @@ -188,9 +206,10 @@ import iris ``` The backend detection priority is: -1. `IRIS_BACKEND` environment variable -2. Auto-detection based on available GPU libraries -3. Default to HIP for backward compatibility +1. Build-time configuration (set via `--config-settings`) +2. `IRIS_BACKEND` environment variable +3. Auto-detection based on available GPU libraries +4. Default to HIP for backward compatibility ## Roadmap diff --git a/build_backend.py b/build_backend.py new file mode 100644 index 00000000..185668cc --- /dev/null +++ b/build_backend.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +""" +Custom build backend to support backend selection via --config-settings. + +This allows users to install Iris with: + pip install . --config-settings backend=nvidia +or: + pip install . --config-settings backend=hip +""" + +import os +from setuptools import build_meta as _orig + +# Re-export all setuptools build_meta functions +prepare_metadata_for_build_wheel = _orig.prepare_metadata_for_build_wheel +get_requires_for_build_wheel = _orig.get_requires_for_build_wheel +get_requires_for_build_sdist = _orig.get_requires_for_build_sdist + + +def _write_backend_config(config_settings): + """Write backend configuration file based on --config-settings.""" + backend = None + + if config_settings: + backend = config_settings.get("backend", "").lower() + + # Normalize backend names + if backend in ("nvidia", "cuda"): + backend = "cuda" + elif backend in ("amd", "rocm", "hip"): + backend = "hip" + else: + backend = None # Auto-detect at runtime + + # Write configuration file + config_dir = os.path.join("iris", ".config") + os.makedirs(config_dir, exist_ok=True) + + config_file = os.path.join(config_dir, "backend.txt") + if backend: + with open(config_file, "w") as f: + f.write(backend) + print(f"Iris: Configured to use {backend} backend") + else: + # Remove config file if it exists (auto-detect mode) + if os.path.exists(config_file): + os.remove(config_file) + print("Iris: No backend specified, will auto-detect at runtime") + + +# Wrap build functions to inject backend configuration +def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): + """Build wheel with backend configuration.""" + _write_backend_config(config_settings) + return _orig.build_wheel(wheel_directory, config_settings, metadata_directory) + + +def build_sdist(sdist_directory, config_settings=None): + """Build sdist with backend configuration.""" + _write_backend_config(config_settings) + return _orig.build_sdist(sdist_directory, config_settings) diff --git a/examples/cuda_backend_example.py b/examples/cuda_backend_example.py index 534e42f3..98670d24 100644 --- a/examples/cuda_backend_example.py +++ b/examples/cuda_backend_example.py @@ -5,19 +5,29 @@ """ Example demonstrating how to use Iris with CUDA backend for NVIDIA GPUs. -This script shows how to set the backend and verify it's correctly loaded. +This script shows how to use the CUDA backend and verify it's correctly loaded. + +Backend can be configured in two ways: +1. Build-time (recommended): + pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia + +2. Runtime (if not set at build time): + export IRIS_BACKEND=cuda + python cuda_backend_example.py """ import os import sys -# Set CUDA backend before importing iris +# Set CUDA backend if not configured at build time # This must be done before importing iris to take effect -os.environ["IRIS_BACKEND"] = "cuda" +if "IRIS_BACKEND" not in os.environ: + os.environ["IRIS_BACKEND"] = "cuda" # Now import iris - it will use the CUDA backend import iris + def main(): """ Demonstrate CUDA backend usage with Iris. diff --git a/iris/hip.py b/iris/hip.py index 5d820135..85c86b8c 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -6,11 +6,12 @@ This module provides a HIP-compatible interface that transparently redirects to either the HIP backend (AMD GPUs) or CUDA backend (NVIDIA GPUs) based on -runtime detection or configuration. +build-time configuration, runtime detection, or auto-detection. -The backend is selected based on: -1. IRIS_BACKEND environment variable (set to 'cuda' or 'hip') -2. Auto-detection based on available libraries +The backend is selected based on (in priority order): +1. Build-time configuration (set via pip install --config-settings backend=nvidia) +2. IRIS_BACKEND environment variable (set to 'cuda' or 'hip') +3. Auto-detection based on available libraries """ import os @@ -20,14 +21,28 @@ # Detect backend def _detect_backend(): - """Detect which backend to use based on environment and available libraries.""" + """Detect which backend to use based on build-time config, environment, and available libraries.""" + # 1. Check for build-time configuration file first + config_file = os.path.join(os.path.dirname(__file__), ".config", "backend.txt") + if os.path.exists(config_file): + try: + with open(config_file, "r") as f: + backend_config = f.read().strip().lower() + if backend_config in ("cuda", "nvidia"): + return "cuda" + elif backend_config in ("hip", "amd", "rocm"): + return "hip" + except (IOError, OSError): + pass + + # 2. Check environment variable backend_env = os.environ.get("IRIS_BACKEND", "").lower() if backend_env in ("cuda", "nvidia"): return "cuda" elif backend_env in ("hip", "amd", "rocm"): return "hip" - # Auto-detect by trying to load libraries + # 3. Auto-detect by trying to load libraries import ctypes try: diff --git a/pyproject.toml b/pyproject.toml index 88f7b2e3..e191a8c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,8 @@ [build-system] requires = ["setuptools>=61", "wheel", "setuptools-scm>=8"] -build-backend = "setuptools.build_meta" +build-backend = "build_backend" +backend-path = ["."] [project] name = "iris" diff --git a/tests/unittests/test_backend_detection.py b/tests/unittests/test_backend_detection.py index ced3e850..cba15093 100644 --- a/tests/unittests/test_backend_detection.py +++ b/tests/unittests/test_backend_detection.py @@ -6,6 +6,12 @@ These tests verify that the backend detection logic works correctly and that the appropriate backend module is selected based on configuration. + +Backend selection priority: +1. Build-time configuration (--config-settings backend=nvidia) +2. IRIS_BACKEND environment variable +3. Auto-detection based on available libraries +4. Default to HIP """ import os @@ -231,3 +237,143 @@ def test_iris_py_uses_get_ipc_handle_size(): # Run tests pytest.main([__file__, "-v"]) + + +def test_build_time_config_cuda(): + """Test that build-time configuration for CUDA is respected.""" + import tempfile + import shutil + + # Create a temporary config + config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") + os.makedirs(config_dir, exist_ok=True) + config_file = os.path.join(config_dir, "backend.txt") + + old_env = os.environ.get("IRIS_BACKEND") + + try: + # Write CUDA config + with open(config_file, "w") as f: + f.write("cuda") + + # Clear environment variable to test config priority + os.environ.pop("IRIS_BACKEND", None) + + # Load hip.py + spec = importlib.util.spec_from_file_location( + "hip_test_buildtime_cuda", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py"), + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that backend was set to 'cuda' from config file + assert hasattr(hip_module, "_backend") + assert hip_module._backend == "cuda", "Build-time config should set backend to 'cuda'" + + finally: + # Clean up + if os.path.exists(config_file): + os.remove(config_file) + if os.path.exists(config_dir) and not os.listdir(config_dir): + os.rmdir(config_dir) + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_build_time_config_priority(): + """Test that build-time configuration takes priority over environment variable.""" + config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") + os.makedirs(config_dir, exist_ok=True) + config_file = os.path.join(config_dir, "backend.txt") + + old_env = os.environ.get("IRIS_BACKEND") + + try: + # Write HIP config + with open(config_file, "w") as f: + f.write("hip") + + # Set environment to CUDA (should be overridden by config) + os.environ["IRIS_BACKEND"] = "cuda" + + # Load hip.py + spec = importlib.util.spec_from_file_location( + "hip_test_priority", + os.path.join(os.path.dirname(__file__), "../../iris/hip.py"), + ) + hip_module = importlib.util.module_from_spec(spec) + + try: + spec.loader.exec_module(hip_module) + except OSError: + # Expected - GPU library not found + pass + + # Check that config takes priority + assert hasattr(hip_module, "_backend") + assert hip_module._backend == "hip", "Build-time config should take priority over env var" + + finally: + # Clean up + if os.path.exists(config_file): + os.remove(config_file) + if os.path.exists(config_dir) and not os.listdir(config_dir): + os.rmdir(config_dir) + # Restore environment + if old_env is not None: + os.environ["IRIS_BACKEND"] = old_env + else: + os.environ.pop("IRIS_BACKEND", None) + + +def test_build_backend_module(): + """Test that the build_backend module can write configuration correctly.""" + + sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..")) + + try: + from build_backend import _write_backend_config + + config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") + config_file = os.path.join(config_dir, "backend.txt") + + # Test nvidia alias + _write_backend_config({"backend": "nvidia"}) + assert os.path.exists(config_file), "Config file should be created" + with open(config_file, "r") as f: + assert f.read() == "cuda", "nvidia should map to cuda" + + # Test hip + _write_backend_config({"backend": "hip"}) + with open(config_file, "r") as f: + assert f.read() == "hip", "hip should stay as hip" + + # Test amd alias + _write_backend_config({"backend": "amd"}) + with open(config_file, "r") as f: + assert f.read() == "hip", "amd should map to hip" + + # Test no config (should remove file) + _write_backend_config({}) + assert not os.path.exists(config_file), "Config file should be removed for auto-detect" + + finally: + # Clean up + if os.path.exists(config_file): + os.remove(config_file) + if os.path.exists(config_dir) and not os.listdir(config_dir): + os.rmdir(config_dir) + + +if __name__ == "__main__": + # Run tests + pytest.main([__file__, "-v"]) From 0a5992dec6ce1bf0cbb41d75b3b0f3e4b5b9d22a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 02:43:59 +0000 Subject: [PATCH 08/22] Remove redundant line from cuda_backend_example.py docstring Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- examples/cuda_backend_example.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/cuda_backend_example.py b/examples/cuda_backend_example.py index 98670d24..e4ee8d34 100644 --- a/examples/cuda_backend_example.py +++ b/examples/cuda_backend_example.py @@ -5,8 +5,6 @@ """ Example demonstrating how to use Iris with CUDA backend for NVIDIA GPUs. -This script shows how to use the CUDA backend and verify it's correctly loaded. - Backend can be configured in two ways: 1. Build-time (recommended): pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia From 698604c9817a82e6c40b0180f86265dce9a96d58 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 02:50:39 +0000 Subject: [PATCH 09/22] Add build() function to setup.py for config_settings handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add build() hook in setup.py to handle --config-settings - Sets IRIS_BACKEND environment variable during build - Normalizes backend names (nvidia→cuda, amd/rocm→hip) - Updated build_backend.py to also set env var for consistency - All 11 unit tests pass Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- build_backend.py | 4 ++++ setup.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/build_backend.py b/build_backend.py index 185668cc..06f7e2c9 100644 --- a/build_backend.py +++ b/build_backend.py @@ -34,6 +34,10 @@ def _write_backend_config(config_settings): else: backend = None # Auto-detect at runtime + # Also set environment variable as suggested in setup.py + if backend: + os.environ["IRIS_BACKEND"] = backend + # Write configuration file config_dir = os.path.join("iris", ".config") os.makedirs(config_dir, exist_ok=True) diff --git a/setup.py b/setup.py index 69832461..e7b4cef6 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,36 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +import os from setuptools import setup + +def build(setup_kwargs, config_settings=None): + """ + Build hook to configure backend via --config-settings. + + This function is called during the build process to handle + backend configuration from pip install --config-settings. + """ + if config_settings is None: + config_settings = {} + + backend = config_settings.get("backend", "amd") + + # Normalize backend names + if backend.lower() in ("nvidia", "cuda"): + backend = "cuda" + elif backend.lower() in ("amd", "rocm", "hip"): + backend = "hip" + else: + backend = "hip" # Default to hip + + os.environ["IRIS_BACKEND"] = backend + print(f"Building Iris with backend={backend}") + + return setup_kwargs + + # This setup.py provides backward compatibility for legacy metadata fields # that don't map directly from pyproject.toml's modern PEP 621 format. setup( From fcfd0ccaafb4234603c820baae6f95730492a44c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 02:55:51 +0000 Subject: [PATCH 10/22] Remove example and test files, rename cuda.py to _cuda.py - Remove examples/cuda_backend_example.py (per review feedback) - Remove tests/unittests/test_backend_detection.py (per review feedback) - Rename iris/cuda.py to iris/_cuda.py (per review feedback) - Update iris/hip.py to reference _cuda.py instead of cuda.py Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- examples/cuda_backend_example.py | 100 ------ iris/{cuda.py => _cuda.py} | 0 iris/hip.py | 2 +- tests/unittests/test_backend_detection.py | 379 ---------------------- 4 files changed, 1 insertion(+), 480 deletions(-) delete mode 100644 examples/cuda_backend_example.py rename iris/{cuda.py => _cuda.py} (100%) delete mode 100644 tests/unittests/test_backend_detection.py diff --git a/examples/cuda_backend_example.py b/examples/cuda_backend_example.py deleted file mode 100644 index e4ee8d34..00000000 --- a/examples/cuda_backend_example.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Example demonstrating how to use Iris with CUDA backend for NVIDIA GPUs. - -Backend can be configured in two ways: -1. Build-time (recommended): - pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia - -2. Runtime (if not set at build time): - export IRIS_BACKEND=cuda - python cuda_backend_example.py -""" - -import os -import sys - -# Set CUDA backend if not configured at build time -# This must be done before importing iris to take effect -if "IRIS_BACKEND" not in os.environ: - os.environ["IRIS_BACKEND"] = "cuda" - -# Now import iris - it will use the CUDA backend -import iris - - -def main(): - """ - Demonstrate CUDA backend usage with Iris. - - This example shows: - 1. How to set the CUDA backend - 2. How to verify the backend is loaded - 3. Basic Iris operations with CUDA - """ - - print("=" * 60) - print("Iris CUDA Backend Example") - print("=" * 60) - - # Check which backend is being used - try: - backend = iris.hip.get_backend() - print(f"✓ Backend loaded: {backend}") - - if backend == "cuda": - print("✓ Successfully using CUDA backend for NVIDIA GPUs") - else: - print(f"! Note: Using {backend} backend instead of CUDA") - except Exception as e: - print(f"✗ Could not determine backend: {e}") - - # Initialize Iris with a symmetric heap - heap_size = 1 << 30 # 1 GB - print(f"\nInitializing Iris with {heap_size / (1024**3):.1f} GB heap...") - - try: - ctx = iris.iris(heap_size) - print("✓ Iris initialized successfully") - print(f" - Rank: {ctx.get_rank()}") - print(f" - Number of ranks: {ctx.get_num_ranks()}") - print(f" - Device: {ctx.get_device()}") - print(f" - Compute units: {ctx.get_cu_count()}") - - # Allocate a tensor on the symmetric heap - print("\nAllocating tensor on symmetric heap...") - tensor = ctx.zeros(1000, 1000, dtype=torch.float32) - print(f"✓ Tensor allocated: shape={tensor.shape}, dtype={tensor.dtype}") - print(f" - On symmetric heap: {ctx._Iris__on_symmetric_heap(tensor)}") - print(f" - Device: {tensor.device}") - - except Exception as e: - print(f"✗ Error initializing Iris: {e}") - print("\nNote: This example requires:") - print(" - NVIDIA GPU with CUDA support") - print(" - PyTorch with CUDA") - print(" - NCCL for distributed operations") - sys.exit(1) - - print("\n" + "=" * 60) - print("Example completed successfully!") - print("=" * 60) - - -if __name__ == "__main__": - import torch - import torch.distributed as dist - - # Check if CUDA is available - if not torch.cuda.is_available(): - print("Error: CUDA is not available. This example requires NVIDIA GPU.") - print("\nTo use Iris with AMD GPUs, use the default HIP backend:") - print(" python your_script.py # No IRIS_BACKEND needed") - sys.exit(1) - - # For this simple example, we'll run single-rank - # For multi-rank examples, see the examples/ directory - main() diff --git a/iris/cuda.py b/iris/_cuda.py similarity index 100% rename from iris/cuda.py rename to iris/_cuda.py diff --git a/iris/hip.py b/iris/hip.py index 85c86b8c..d7645c39 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -66,7 +66,7 @@ def _detect_backend(): # Load the appropriate backend module directly without triggering __init__.py _module_dir = os.path.dirname(__file__) if _backend == "cuda": - _module_path = os.path.join(_module_dir, "cuda.py") + _module_path = os.path.join(_module_dir, "_cuda.py") _spec = importlib.util.spec_from_file_location("iris._cuda_backend", _module_path) else: _module_path = os.path.join(_module_dir, "_hip.py") diff --git a/tests/unittests/test_backend_detection.py b/tests/unittests/test_backend_detection.py deleted file mode 100644 index cba15093..00000000 --- a/tests/unittests/test_backend_detection.py +++ /dev/null @@ -1,379 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Unit tests for backend detection and runtime module selection. - -These tests verify that the backend detection logic works correctly -and that the appropriate backend module is selected based on configuration. - -Backend selection priority: -1. Build-time configuration (--config-settings backend=nvidia) -2. IRIS_BACKEND environment variable -3. Auto-detection based on available libraries -4. Default to HIP -""" - -import os -import sys -import pytest -import importlib -import importlib.util - - -def test_backend_detection_default(): - """Test that default backend is HIP when no environment variable is set.""" - # Clear any existing IRIS_BACKEND setting - old_env = os.environ.pop("IRIS_BACKEND", None) - - try: - # Load hip.py directly to test detection logic - spec = importlib.util.spec_from_file_location( - "hip_test", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - ) - hip_module = importlib.util.module_from_spec(spec) - - # Execute the module - detection happens before trying to load backend - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found, but detection logic runs first - pass - - # Check that backend was set to 'hip' (default) - assert hasattr(hip_module, '_backend') - assert hip_module._backend == 'hip', "Default backend should be 'hip'" - - finally: - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - - -def test_backend_detection_cuda_env(): - """Test that CUDA backend is selected when IRIS_BACKEND=cuda.""" - old_env = os.environ.get("IRIS_BACKEND") - - try: - os.environ["IRIS_BACKEND"] = "cuda" - - # Load hip.py directly - spec = importlib.util.spec_from_file_location( - "hip_test_cuda", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that backend was set to 'cuda' - assert hasattr(hip_module, '_backend') - assert hip_module._backend == 'cuda', "Backend should be 'cuda' when IRIS_BACKEND=cuda" - - finally: - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_backend_detection_nvidia_alias(): - """Test that CUDA backend is selected when IRIS_BACKEND=nvidia.""" - old_env = os.environ.get("IRIS_BACKEND") - - try: - os.environ["IRIS_BACKEND"] = "nvidia" - - # Load hip.py directly - spec = importlib.util.spec_from_file_location( - "hip_test_nvidia", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that backend was set to 'cuda' - assert hasattr(hip_module, '_backend') - assert hip_module._backend == 'cuda', "Backend should be 'cuda' when IRIS_BACKEND=nvidia" - - finally: - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_backend_detection_hip_env(): - """Test that HIP backend is selected when IRIS_BACKEND=hip.""" - old_env = os.environ.get("IRIS_BACKEND") - - try: - os.environ["IRIS_BACKEND"] = "hip" - - # Load hip.py directly - spec = importlib.util.spec_from_file_location( - "hip_test_hip", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that backend was set to 'hip' - assert hasattr(hip_module, '_backend') - assert hip_module._backend == 'hip', "Backend should be 'hip' when IRIS_BACKEND=hip" - - finally: - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_backend_detection_amd_alias(): - """Test that HIP backend is selected when IRIS_BACKEND=amd.""" - old_env = os.environ.get("IRIS_BACKEND") - - try: - os.environ["IRIS_BACKEND"] = "amd" - - # Load hip.py directly - spec = importlib.util.spec_from_file_location( - "hip_test_amd", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that backend was set to 'hip' - assert hasattr(hip_module, '_backend') - assert hip_module._backend == 'hip', "Backend should be 'hip' when IRIS_BACKEND=amd" - - finally: - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_ipc_handle_size_definition(): - """Test that IPC handle size functions are defined correctly in source files.""" - import re - - # Check _hip.py defines get_ipc_handle_size returning 64 - hip_file = os.path.join(os.path.dirname(__file__), "../../iris/_hip.py") - with open(hip_file, 'r') as f: - hip_content = f.read() - - assert 'def get_ipc_handle_size()' in hip_content, "_hip.py should define get_ipc_handle_size" - assert 'return 64' in hip_content, "_hip.py should return 64 for IPC handle size" - - # Check cuda.py defines get_ipc_handle_size returning 128 - cuda_file = os.path.join(os.path.dirname(__file__), "../../iris/cuda.py") - with open(cuda_file, 'r') as f: - cuda_content = f.read() - - assert 'def get_ipc_handle_size()' in cuda_content, "cuda.py should define get_ipc_handle_size" - assert 'return 128' in cuda_content, "cuda.py should return 128 for IPC handle size" - - -def test_hip_module_structure(): - """Test that hip.py has the expected structure for backend redirection.""" - hip_file = os.path.join(os.path.dirname(__file__), "../../iris/hip.py") - with open(hip_file, 'r') as f: - hip_content = f.read() - - # Check for backend detection function - assert '_detect_backend' in hip_content, "hip.py should have _detect_backend function" - - # Check for get_backend function - assert 'def get_backend():' in hip_content, "hip.py should have get_backend function" - - # Check for environment variable handling - assert 'IRIS_BACKEND' in hip_content, "hip.py should check IRIS_BACKEND environment variable" - - # Check for backend aliases - assert 'cuda' in hip_content and 'nvidia' in hip_content, "hip.py should support cuda/nvidia aliases" - assert 'hip' in hip_content and 'amd' in hip_content, "hip.py should support hip/amd aliases" - - -def test_iris_py_uses_get_ipc_handle_size(): - """Test that iris.py uses get_ipc_handle_size from hip module.""" - iris_file = os.path.join(os.path.dirname(__file__), "../../iris/iris.py") - with open(iris_file, 'r') as f: - iris_content = f.read() - - # Check that get_ipc_handle_size is imported - assert 'get_ipc_handle_size' in iris_content, "iris.py should import get_ipc_handle_size" - - # Check that it's used instead of hardcoded 64 - assert 'ipc_handle_size = get_ipc_handle_size()' in iris_content, "iris.py should call get_ipc_handle_size()" - - -if __name__ == "__main__": - # Run tests - pytest.main([__file__, "-v"]) - - - -def test_build_time_config_cuda(): - """Test that build-time configuration for CUDA is respected.""" - import tempfile - import shutil - - # Create a temporary config - config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") - os.makedirs(config_dir, exist_ok=True) - config_file = os.path.join(config_dir, "backend.txt") - - old_env = os.environ.get("IRIS_BACKEND") - - try: - # Write CUDA config - with open(config_file, "w") as f: - f.write("cuda") - - # Clear environment variable to test config priority - os.environ.pop("IRIS_BACKEND", None) - - # Load hip.py - spec = importlib.util.spec_from_file_location( - "hip_test_buildtime_cuda", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py"), - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that backend was set to 'cuda' from config file - assert hasattr(hip_module, "_backend") - assert hip_module._backend == "cuda", "Build-time config should set backend to 'cuda'" - - finally: - # Clean up - if os.path.exists(config_file): - os.remove(config_file) - if os.path.exists(config_dir) and not os.listdir(config_dir): - os.rmdir(config_dir) - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_build_time_config_priority(): - """Test that build-time configuration takes priority over environment variable.""" - config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") - os.makedirs(config_dir, exist_ok=True) - config_file = os.path.join(config_dir, "backend.txt") - - old_env = os.environ.get("IRIS_BACKEND") - - try: - # Write HIP config - with open(config_file, "w") as f: - f.write("hip") - - # Set environment to CUDA (should be overridden by config) - os.environ["IRIS_BACKEND"] = "cuda" - - # Load hip.py - spec = importlib.util.spec_from_file_location( - "hip_test_priority", - os.path.join(os.path.dirname(__file__), "../../iris/hip.py"), - ) - hip_module = importlib.util.module_from_spec(spec) - - try: - spec.loader.exec_module(hip_module) - except OSError: - # Expected - GPU library not found - pass - - # Check that config takes priority - assert hasattr(hip_module, "_backend") - assert hip_module._backend == "hip", "Build-time config should take priority over env var" - - finally: - # Clean up - if os.path.exists(config_file): - os.remove(config_file) - if os.path.exists(config_dir) and not os.listdir(config_dir): - os.rmdir(config_dir) - # Restore environment - if old_env is not None: - os.environ["IRIS_BACKEND"] = old_env - else: - os.environ.pop("IRIS_BACKEND", None) - - -def test_build_backend_module(): - """Test that the build_backend module can write configuration correctly.""" - - sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..")) - - try: - from build_backend import _write_backend_config - - config_dir = os.path.join(os.path.dirname(__file__), "../../iris/.config") - config_file = os.path.join(config_dir, "backend.txt") - - # Test nvidia alias - _write_backend_config({"backend": "nvidia"}) - assert os.path.exists(config_file), "Config file should be created" - with open(config_file, "r") as f: - assert f.read() == "cuda", "nvidia should map to cuda" - - # Test hip - _write_backend_config({"backend": "hip"}) - with open(config_file, "r") as f: - assert f.read() == "hip", "hip should stay as hip" - - # Test amd alias - _write_backend_config({"backend": "amd"}) - with open(config_file, "r") as f: - assert f.read() == "hip", "amd should map to hip" - - # Test no config (should remove file) - _write_backend_config({}) - assert not os.path.exists(config_file), "Config file should be removed for auto-detect" - - finally: - # Clean up - if os.path.exists(config_file): - os.remove(config_file) - if os.path.exists(config_dir) and not os.listdir(config_dir): - os.rmdir(config_dir) - - -if __name__ == "__main__": - # Run tests - pytest.main([__file__, "-v"]) From a162b9e4df17a53d12089d621f820fb2c928af83 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 03:01:34 +0000 Subject: [PATCH 11/22] Replace env var approach with _backend_selected.py file - Change build() in setup.py to write iris/_backend_selected.py instead of setting env var - Update build_backend.py to write _backend_selected.py instead of .config/backend.txt - Update iris/hip.py to import _backend_selected module instead of reading file - Update .gitignore to track _backend_selected.py instead of .config/ - Change default backend from "amd" to "hip" in setup.py - Remove environment variable usage per feedback Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .gitignore | 3 ++- build_backend.py | 20 ++++++-------------- iris/hip.py | 21 ++++++++++----------- setup.py | 6 ++++-- 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 6242c995..5ce8808f 100644 --- a/.gitignore +++ b/.gitignore @@ -28,5 +28,6 @@ slurm*.out examples/gemm/results/* asm/ + # Backend configuration (generated at build time) -iris/.config/ +iris/_backend_selected.py diff --git a/build_backend.py b/build_backend.py index 06f7e2c9..e2c21c04 100644 --- a/build_backend.py +++ b/build_backend.py @@ -34,23 +34,15 @@ def _write_backend_config(config_settings): else: backend = None # Auto-detect at runtime - # Also set environment variable as suggested in setup.py + # Write backend selection to a Python file if backend: - os.environ["IRIS_BACKEND"] = backend - - # Write configuration file - config_dir = os.path.join("iris", ".config") - os.makedirs(config_dir, exist_ok=True) - - config_file = os.path.join(config_dir, "backend.txt") - if backend: - with open(config_file, "w") as f: - f.write(backend) + with open("iris/_backend_selected.py", "w") as f: + f.write(f'BACKEND = "{backend}"\n') print(f"Iris: Configured to use {backend} backend") else: - # Remove config file if it exists (auto-detect mode) - if os.path.exists(config_file): - os.remove(config_file) + # Remove file if it exists (auto-detect mode) + if os.path.exists("iris/_backend_selected.py"): + os.remove("iris/_backend_selected.py") print("Iris: No backend specified, will auto-detect at runtime") diff --git a/iris/hip.py b/iris/hip.py index d7645c39..26db03b9 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -23,17 +23,16 @@ def _detect_backend(): """Detect which backend to use based on build-time config, environment, and available libraries.""" # 1. Check for build-time configuration file first - config_file = os.path.join(os.path.dirname(__file__), ".config", "backend.txt") - if os.path.exists(config_file): - try: - with open(config_file, "r") as f: - backend_config = f.read().strip().lower() - if backend_config in ("cuda", "nvidia"): - return "cuda" - elif backend_config in ("hip", "amd", "rocm"): - return "hip" - except (IOError, OSError): - pass + try: + from . import _backend_selected + + backend_config = _backend_selected.BACKEND.lower() + if backend_config in ("cuda", "nvidia"): + return "cuda" + elif backend_config in ("hip", "amd", "rocm"): + return "hip" + except (ImportError, AttributeError): + pass # 2. Check environment variable backend_env = os.environ.get("IRIS_BACKEND", "").lower() diff --git a/setup.py b/setup.py index e7b4cef6..49454359 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ def build(setup_kwargs, config_settings=None): if config_settings is None: config_settings = {} - backend = config_settings.get("backend", "amd") + backend = config_settings.get("backend", "hip") # Normalize backend names if backend.lower() in ("nvidia", "cuda"): @@ -25,7 +25,9 @@ def build(setup_kwargs, config_settings=None): else: backend = "hip" # Default to hip - os.environ["IRIS_BACKEND"] = backend + # Write backend selection to a Python file + with open("iris/_backend_selected.py", "w") as f: + f.write(f'BACKEND = "{backend}"\n') print(f"Building Iris with backend={backend}") return setup_kwargs From eaf980fbe3609ed4d80504e3a39c1be973b52213 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 03:06:40 +0000 Subject: [PATCH 12/22] Remove get_rocm_version() function from _cuda.py - Removed get_rocm_version() function as it's not used in CUDA backend - Function is ROCm-specific and not needed for NVIDIA GPUs - get_num_xcc() in CUDA backend returns 1 directly without version check Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/_cuda.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/iris/_cuda.py b/iris/_cuda.py index aa7a04ea..db227d3a 100644 --- a/iris/_cuda.py +++ b/iris/_cuda.py @@ -97,36 +97,6 @@ def get_cu_count(device_id=None): return cu_count.value -def get_rocm_version(): - # Return CUDA version instead - major, minor = -1, -1 - - # Try nvcc --version - try: - result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, check=True) - # Parse version from output like "release 12.0, V12.0.76" - for line in result.stdout.split("\n"): - if "release" in line.lower(): - version_part = line.split("release")[1].strip().split(",")[0] - parts = version_part.split(".") - if len(parts) >= 2: - major = int(parts[0]) - minor = int(parts[1]) - break - except (subprocess.CalledProcessError, FileNotFoundError, ValueError, IndexError): - # If we can't get CUDA version, try environment variable - cuda_version = os.environ.get("CUDA_VERSION") - if cuda_version: - try: - parts = cuda_version.split(".") - major = int(parts[0]) - minor = int(parts[1]) if len(parts) > 1 else 0 - except (ValueError, IndexError): - pass - - return (major, minor) - - def get_wall_clock_rate(device_id): cudaDevAttrMemoryClockRate = 36 wall_clock_rate = ctypes.c_int() From ee8a18af7d2619a4c6f5287e1d0451d96971dcc8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 03:11:41 +0000 Subject: [PATCH 13/22] Simplify backend loading and remove unused functions - Remove malloc/free functions from _cuda.py (not used) - Simplify iris/hip.py to use direct imports instead of dynamic loading - Remove importlib.util dependency - Cleaner and more readable code structure Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/_cuda.py | 14 -------------- iris/hip.py | 19 +++---------------- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/iris/_cuda.py b/iris/_cuda.py index db227d3a..958a5d74 100644 --- a/iris/_cuda.py +++ b/iris/_cuda.py @@ -118,17 +118,3 @@ def get_arch_string(device_id=None): def get_num_xcc(device_id=None): # XCC is AMD-specific, return 1 for CUDA return 1 - - -def malloc_fine_grained(size): - return hip_malloc(size) - - -def hip_malloc(size): - ptr = ctypes.c_void_p() - hip_try(cuda_runtime.cudaMalloc(ctypes.byref(ptr), size)) - return ptr - - -def hip_free(ptr): - hip_try(cuda_runtime.cudaFree(ptr)) diff --git a/iris/hip.py b/iris/hip.py index 26db03b9..1415c293 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -15,8 +15,6 @@ """ import os -import sys -import importlib.util # Detect backend @@ -62,22 +60,11 @@ def _detect_backend(): _backend = _detect_backend() -# Load the appropriate backend module directly without triggering __init__.py -_module_dir = os.path.dirname(__file__) +# Import all public symbols from the appropriate backend module if _backend == "cuda": - _module_path = os.path.join(_module_dir, "_cuda.py") - _spec = importlib.util.spec_from_file_location("iris._cuda_backend", _module_path) + from iris._cuda import * # noqa: F403, F401 else: - _module_path = os.path.join(_module_dir, "_hip.py") - _spec = importlib.util.spec_from_file_location("iris._hip_backend", _module_path) - -_runtime_module = importlib.util.module_from_spec(_spec) -_spec.loader.exec_module(_runtime_module) - -# Export all public symbols from the backend module -for _name in dir(_runtime_module): - if not _name.startswith("_"): - globals()[_name] = getattr(_runtime_module, _name) + from iris._hip import * # noqa: F403, F401 # Make backend information available From cde3ab8b8758fb943762cc65c30cb361521a40d2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 04:14:51 +0000 Subject: [PATCH 14/22] Remove build_backend.py and all environment variable support - Delete build_backend.py (setup.py handles --config-settings) - Update pyproject.toml to use setuptools.build_meta directly - Remove IRIS_BACKEND environment variable checking from iris/hip.py - Remove all env var documentation from README.md - Backend now selected via: 1) build-time config, 2) auto-detection, 3) default to HIP Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- README.md | 39 ++------------------------------ build_backend.py | 59 ------------------------------------------------ iris/hip.py | 20 ++++------------ pyproject.toml | 3 +-- 4 files changed, 8 insertions(+), 113 deletions(-) delete mode 100644 build_backend.py diff --git a/README.md b/README.md index 09f7d7ad..8773f19f 100644 --- a/README.md +++ b/README.md @@ -125,9 +125,6 @@ To use Iris with AMD GPUs (default): pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip ``` -> [!NOTE] -> The backend can also be controlled at runtime via the `IRIS_BACKEND` environment variable if not set at build time. - ### Docker Compose (Recommended for Development) The recommended way to get started is using Docker Compose, which provides a development environment with the Iris directory mounted inside the container. This allows you to make changes to the code outside the container and see them reflected inside. @@ -167,11 +164,7 @@ Iris currently supports: ### Backend Selection -Iris supports two methods for backend selection: - -#### 1. Build-time Configuration (Recommended) - -Install Iris with the desired backend using `--config-settings`: +Iris supports backend selection at build time using `--config-settings`: ```bash # For NVIDIA GPUs @@ -181,35 +174,7 @@ pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidi pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip ``` -#### 2. Runtime Environment Variable - -If no backend was specified at build time, you can control it via the `IRIS_BACKEND` environment variable: - -```bash -# Use CUDA backend for NVIDIA GPUs -export IRIS_BACKEND=cuda -# or -export IRIS_BACKEND=nvidia - -# Use HIP backend for AMD GPUs (default) -export IRIS_BACKEND=hip -# or -export IRIS_BACKEND=amd -``` - -Or set it in your Python script: - -```python -import os -os.environ["IRIS_BACKEND"] = "cuda" # Must be set before importing iris -import iris -``` - -The backend detection priority is: -1. Build-time configuration (set via `--config-settings`) -2. `IRIS_BACKEND` environment variable -3. Auto-detection based on available GPU libraries -4. Default to HIP for backward compatibility +If no backend is specified at build time, Iris will auto-detect the appropriate backend based on available GPU libraries (libamdhip64.so for AMD, libcudart.so for NVIDIA), defaulting to HIP if neither is found. ## Roadmap diff --git a/build_backend.py b/build_backend.py deleted file mode 100644 index e2c21c04..00000000 --- a/build_backend.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -""" -Custom build backend to support backend selection via --config-settings. - -This allows users to install Iris with: - pip install . --config-settings backend=nvidia -or: - pip install . --config-settings backend=hip -""" - -import os -from setuptools import build_meta as _orig - -# Re-export all setuptools build_meta functions -prepare_metadata_for_build_wheel = _orig.prepare_metadata_for_build_wheel -get_requires_for_build_wheel = _orig.get_requires_for_build_wheel -get_requires_for_build_sdist = _orig.get_requires_for_build_sdist - - -def _write_backend_config(config_settings): - """Write backend configuration file based on --config-settings.""" - backend = None - - if config_settings: - backend = config_settings.get("backend", "").lower() - - # Normalize backend names - if backend in ("nvidia", "cuda"): - backend = "cuda" - elif backend in ("amd", "rocm", "hip"): - backend = "hip" - else: - backend = None # Auto-detect at runtime - - # Write backend selection to a Python file - if backend: - with open("iris/_backend_selected.py", "w") as f: - f.write(f'BACKEND = "{backend}"\n') - print(f"Iris: Configured to use {backend} backend") - else: - # Remove file if it exists (auto-detect mode) - if os.path.exists("iris/_backend_selected.py"): - os.remove("iris/_backend_selected.py") - print("Iris: No backend specified, will auto-detect at runtime") - - -# Wrap build functions to inject backend configuration -def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - """Build wheel with backend configuration.""" - _write_backend_config(config_settings) - return _orig.build_wheel(wheel_directory, config_settings, metadata_directory) - - -def build_sdist(sdist_directory, config_settings=None): - """Build sdist with backend configuration.""" - _write_backend_config(config_settings) - return _orig.build_sdist(sdist_directory, config_settings) diff --git a/iris/hip.py b/iris/hip.py index 1415c293..6f7a9782 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -6,20 +6,19 @@ This module provides a HIP-compatible interface that transparently redirects to either the HIP backend (AMD GPUs) or CUDA backend (NVIDIA GPUs) based on -build-time configuration, runtime detection, or auto-detection. +build-time configuration or auto-detection. The backend is selected based on (in priority order): 1. Build-time configuration (set via pip install --config-settings backend=nvidia) -2. IRIS_BACKEND environment variable (set to 'cuda' or 'hip') -3. Auto-detection based on available libraries +2. Auto-detection based on available libraries """ -import os +import ctypes # Detect backend def _detect_backend(): - """Detect which backend to use based on build-time config, environment, and available libraries.""" + """Detect which backend to use based on build-time config and available libraries.""" # 1. Check for build-time configuration file first try: from . import _backend_selected @@ -32,16 +31,7 @@ def _detect_backend(): except (ImportError, AttributeError): pass - # 2. Check environment variable - backend_env = os.environ.get("IRIS_BACKEND", "").lower() - if backend_env in ("cuda", "nvidia"): - return "cuda" - elif backend_env in ("hip", "amd", "rocm"): - return "hip" - - # 3. Auto-detect by trying to load libraries - import ctypes - + # 2. Auto-detect by trying to load libraries try: ctypes.cdll.LoadLibrary("libamdhip64.so") return "hip" diff --git a/pyproject.toml b/pyproject.toml index e191a8c5..88f7b2e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,7 @@ [build-system] requires = ["setuptools>=61", "wheel", "setuptools-scm>=8"] -build-backend = "build_backend" -backend-path = ["."] +build-backend = "setuptools.build_meta" [project] name = "iris" From 96f462d42251450cb23d33f20a4ed688d974f8f9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 04:24:00 +0000 Subject: [PATCH 15/22] Revert README.md to original state - Restore README.md to state before PR changes - Keep only code changes without documentation updates - All linting checks pass Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- README.md | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/README.md b/README.md index 8773f19f..2131d3f1 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,7 @@ if __name__ == "__main__": ### Quick Installation > [!NOTE] -> **Requirements**: Python 3.10+, PyTorch 2.0+, and Triton -> - For AMD GPUs: ROCm 6.3.1+ HIP runtime and PyTorch ROCm version -> - For NVIDIA GPUs: CUDA 11.0+ runtime and PyTorch CUDA version +> **Requirements**: Python 3.10+, PyTorch 2.0+ (ROCm version), ROCm 6.3.1+ HIP runtime, and Triton For a quick installation directly from the repository: @@ -113,18 +111,6 @@ For a quick installation directly from the repository: pip install git+https://github.com/ROCm/iris.git ``` -To use Iris with NVIDIA GPUs, install with the CUDA backend: - -```shell -pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia -``` - -To use Iris with AMD GPUs (default): - -```shell -pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip -``` - ### Docker Compose (Recommended for Development) The recommended way to get started is using Docker Compose, which provides a development environment with the Iris directory mounted inside the container. This allows you to make changes to the code outside the container and see them reflected inside. @@ -153,29 +139,11 @@ Check out our [examples](examples/) directory for ready-to-run scripts and usage Iris currently supports: -### AMD GPUs (HIP Backend) - MI300X, MI350X & MI355X -### NVIDIA GPUs (CUDA Backend) -- All CUDA-capable GPUs with compute capability 7.0+ - > [!NOTE] > Iris may work on other AMD GPUs with ROCm compatibility. -### Backend Selection - -Iris supports backend selection at build time using `--config-settings`: - -```bash -# For NVIDIA GPUs -pip install git+https://github.com/ROCm/iris.git --config-settings backend=nvidia - -# For AMD GPUs -pip install git+https://github.com/ROCm/iris.git --config-settings backend=hip -``` - -If no backend is specified at build time, Iris will auto-detect the appropriate backend based on available GPU libraries (libamdhip64.so for AMD, libcudart.so for NVIDIA), defaulting to HIP if neither is found. - ## Roadmap We plan to extend Iris with the following features: From c64e4ed65fec2a4288be1d4bb2a8f009294be06a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 05:21:05 +0000 Subject: [PATCH 16/22] Simplify to auto-detection only, remove build() hook - Remove build() function from setup.py (back to original) - Simplify iris/hip.py to use only auto-detection (no build-time config) - Remove _backend_selected.py from .gitignore (not used) - Backend selection now purely based on library availability - Minimal changes - keeps only essential backend auto-detection Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- .gitignore | 3 +-- iris/hip.py | 22 +++------------------- setup.py | 30 ------------------------------ 3 files changed, 4 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index 5ce8808f..78645c5a 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,4 @@ slurm*.out examples/gemm/results/* asm/ -# Backend configuration (generated at build time) -iris/_backend_selected.py +# Backend configuration (generated \ No newline at end of file diff --git a/iris/hip.py b/iris/hip.py index 6f7a9782..a731292c 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -6,11 +6,7 @@ This module provides a HIP-compatible interface that transparently redirects to either the HIP backend (AMD GPUs) or CUDA backend (NVIDIA GPUs) based on -build-time configuration or auto-detection. - -The backend is selected based on (in priority order): -1. Build-time configuration (set via pip install --config-settings backend=nvidia) -2. Auto-detection based on available libraries +auto-detection. """ import ctypes @@ -18,20 +14,8 @@ # Detect backend def _detect_backend(): - """Detect which backend to use based on build-time config and available libraries.""" - # 1. Check for build-time configuration file first - try: - from . import _backend_selected - - backend_config = _backend_selected.BACKEND.lower() - if backend_config in ("cuda", "nvidia"): - return "cuda" - elif backend_config in ("hip", "amd", "rocm"): - return "hip" - except (ImportError, AttributeError): - pass - - # 2. Auto-detect by trying to load libraries + """Detect which backend to use based on available libraries.""" + # Auto-detect by trying to load libraries try: ctypes.cdll.LoadLibrary("libamdhip64.so") return "hip" diff --git a/setup.py b/setup.py index 49454359..69832461 100644 --- a/setup.py +++ b/setup.py @@ -1,38 +1,8 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. -import os from setuptools import setup - -def build(setup_kwargs, config_settings=None): - """ - Build hook to configure backend via --config-settings. - - This function is called during the build process to handle - backend configuration from pip install --config-settings. - """ - if config_settings is None: - config_settings = {} - - backend = config_settings.get("backend", "hip") - - # Normalize backend names - if backend.lower() in ("nvidia", "cuda"): - backend = "cuda" - elif backend.lower() in ("amd", "rocm", "hip"): - backend = "hip" - else: - backend = "hip" # Default to hip - - # Write backend selection to a Python file - with open("iris/_backend_selected.py", "w") as f: - f.write(f'BACKEND = "{backend}"\n') - print(f"Building Iris with backend={backend}") - - return setup_kwargs - - # This setup.py provides backward compatibility for legacy metadata fields # that don't map directly from pyproject.toml's modern PEP 621 format. setup( From da69d1bfa1773bbb4ec959aa806427d4488f7f5b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Oct 2025 05:39:19 +0000 Subject: [PATCH 17/22] Consolidate backends into single hip.py with conditional branching - Remove separate _hip.py and _cuda.py files - Add backend auto-detection at module load time (_is_amd_backend flag) - All functions now branch internally based on _is_amd_backend - Minimal changes - keeps original hip.py structure - Dynamic IPC handle size (64 for HIP, 128 for CUDA) - Zero configuration required - works out of the box Co-authored-by: mawad-amd <112003944+mawad-amd@users.noreply.github.com> --- iris/_cuda.py | 120 ---------------------- iris/_hip.py | 170 ------------------------------- iris/hip.py | 274 ++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 244 insertions(+), 320 deletions(-) delete mode 100644 iris/_cuda.py delete mode 100644 iris/_hip.py diff --git a/iris/_cuda.py b/iris/_cuda.py deleted file mode 100644 index 958a5d74..00000000 --- a/iris/_cuda.py +++ /dev/null @@ -1,120 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -import ctypes -import numpy as np -import sys -import torch -import subprocess -import os - -rt_path = "libcudart.so" -cuda_runtime = ctypes.cdll.LoadLibrary(rt_path) - - -def hip_try(err): - if err != 0: - cuda_runtime.cudaGetErrorString.restype = ctypes.c_char_p - error_string = cuda_runtime.cudaGetErrorString(ctypes.c_int(err)).decode("utf-8") - raise RuntimeError(f"CUDA error code {err}: {error_string}") - - -class hipIpcMemHandle_t(ctypes.Structure): - _fields_ = [("internal", ctypes.c_byte * 128)] - - -def get_ipc_handle_size(): - """Return the size of IPC handle in bytes (128 for CUDA).""" - return 128 - - -def open_ipc_handle(ipc_handle_data, rank): - ptr = ctypes.c_void_p() - cudaIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) - cuda_runtime.cudaIpcOpenMemHandle.argtypes = [ - ctypes.POINTER(ctypes.c_void_p), - hipIpcMemHandle_t, - ctypes.c_uint, - ] - if isinstance(ipc_handle_data, np.ndarray): - if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != 128: - raise ValueError("ipc_handle_data must be a 128-element uint8 numpy array") - ipc_handle_bytes = ipc_handle_data.tobytes() - ipc_handle_data = (ctypes.c_char * 128).from_buffer_copy(ipc_handle_bytes) - else: - raise TypeError("ipc_handle_data must be a numpy.ndarray of dtype uint8 with 128 elements") - - raw_memory = ctypes.create_string_buffer(128) - ctypes.memset(raw_memory, 0x00, 128) - ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) - ipc_handle_data_bytes = bytes(ipc_handle_data) - ctypes.memmove(raw_memory, ipc_handle_data_bytes, 128) - - hip_try( - cuda_runtime.cudaIpcOpenMemHandle( - ctypes.byref(ptr), - ipc_handle_struct, - cudaIpcMemLazyEnablePeerAccess, - ) - ) - - return ptr.value - - -def get_ipc_handle(ptr, rank): - ipc_handle = hipIpcMemHandle_t() - hip_try(cuda_runtime.cudaIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) - return ipc_handle - - -def count_devices(): - device_count = ctypes.c_int() - hip_try(cuda_runtime.cudaGetDeviceCount(ctypes.byref(device_count))) - return device_count.value - - -def set_device(gpu_id): - hip_try(cuda_runtime.cudaSetDevice(gpu_id)) - - -def get_device_id(): - device_id = ctypes.c_int() - hip_try(cuda_runtime.cudaGetDevice(ctypes.byref(device_id))) - return device_id.value - - -def get_cu_count(device_id=None): - if device_id is None: - device_id = get_device_id() - - cudaDeviceAttributeMultiprocessorCount = 16 - cu_count = ctypes.c_int() - - hip_try( - cuda_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDeviceAttributeMultiprocessorCount, device_id) - ) - - return cu_count.value - - -def get_wall_clock_rate(device_id): - cudaDevAttrMemoryClockRate = 36 - wall_clock_rate = ctypes.c_int() - status = cuda_runtime.cudaDeviceGetAttribute(ctypes.byref(wall_clock_rate), cudaDevAttrMemoryClockRate, device_id) - hip_try(status) - return wall_clock_rate.value - - -def get_arch_string(device_id=None): - if device_id is None: - device_id = get_device_id() - # For CUDA, get compute capability - device_props = torch.cuda.get_device_properties(device_id) - major = device_props.major - minor = device_props.minor - return f"sm_{major}{minor}" - - -def get_num_xcc(device_id=None): - # XCC is AMD-specific, return 1 for CUDA - return 1 diff --git a/iris/_hip.py b/iris/_hip.py deleted file mode 100644 index f6f4d8ff..00000000 --- a/iris/_hip.py +++ /dev/null @@ -1,170 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. - -import ctypes -import numpy as np -import sys -import torch -import subprocess -import os - -rt_path = "libamdhip64.so" -hip_runtime = ctypes.cdll.LoadLibrary(rt_path) - - -def hip_try(err): - if err != 0: - hip_runtime.hipGetErrorString.restype = ctypes.c_char_p - error_string = hip_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") - raise RuntimeError(f"HIP error code {err}: {error_string}") - - -class hipIpcMemHandle_t(ctypes.Structure): - _fields_ = [("reserved", ctypes.c_char * 64)] - - -def get_ipc_handle_size(): - """Return the size of IPC handle in bytes (64 for HIP).""" - return 64 - - -def open_ipc_handle(ipc_handle_data, rank): - ptr = ctypes.c_void_p() - hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) - hip_runtime.hipIpcOpenMemHandle.argtypes = [ - ctypes.POINTER(ctypes.c_void_p), - hipIpcMemHandle_t, - ctypes.c_uint, - ] - if isinstance(ipc_handle_data, np.ndarray): - if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != 64: - raise ValueError("ipc_handle_data must be a 64-element uint8 numpy array") - ipc_handle_bytes = ipc_handle_data.tobytes() - ipc_handle_data = (ctypes.c_char * 64).from_buffer_copy(ipc_handle_bytes) - else: - raise TypeError("ipc_handle_data must be a numpy.ndarray of dtype uint8 with 64 elements") - - raw_memory = ctypes.create_string_buffer(64) - ctypes.memset(raw_memory, 0x00, 64) - ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) - ipc_handle_data_bytes = bytes(ipc_handle_data) - ctypes.memmove(raw_memory, ipc_handle_data_bytes, 64) - - hip_try( - hip_runtime.hipIpcOpenMemHandle( - ctypes.byref(ptr), - ipc_handle_struct, - hipIpcMemLazyEnablePeerAccess, - ) - ) - - return ptr.value - - -def get_ipc_handle(ptr, rank): - ipc_handle = hipIpcMemHandle_t() - hip_try(hip_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) - return ipc_handle - - -def count_devices(): - device_count = ctypes.c_int() - hip_try(hip_runtime.hipGetDeviceCount(ctypes.byref(device_count))) - return device_count.value - - -def set_device(gpu_id): - hip_try(hip_runtime.hipSetDevice(gpu_id)) - - -def get_device_id(): - device_id = ctypes.c_int() - hip_try(hip_runtime.hipGetDevice(ctypes.byref(device_id))) - return device_id.value - - -def get_cu_count(device_id=None): - if device_id is None: - device_id = get_device_id() - - hipDeviceAttributeMultiprocessorCount = 63 - cu_count = ctypes.c_int() - - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) - - return cu_count.value - - -def get_rocm_version(): - major, minor = -1, -1 - - # Try hipconfig --path first - try: - result = subprocess.run(["hipconfig", "--path"], capture_output=True, text=True, check=True) - rocm_path = result.stdout.strip() - except (subprocess.CalledProcessError, FileNotFoundError): - # Then look for $ROCM_PATH environment variable - rocm_path = os.environ.get("ROCM_PATH") - if not rocm_path: - # Finally, try default location - rocm_path = "/opt/rocm" - - # Try to read version from .info/version file - try: - version_file_path = os.path.join(rocm_path, ".info", "version") - with open(version_file_path, "r") as version_file: - version = version_file.readline().strip() - major = int(version.split(".")[0]) - minor = int(version.split(".")[1]) - except (FileNotFoundError, IOError, ValueError, IndexError): - # If we can't read the version file, return -1, -1 - pass - - return (major, minor) - - -def get_wall_clock_rate(device_id): - hipDeviceAttributeWallClockRate = 10017 - wall_clock_rate = ctypes.c_int() - status = hip_runtime.hipDeviceGetAttribute( - ctypes.byref(wall_clock_rate), hipDeviceAttributeWallClockRate, device_id - ) - hip_try(status) - return wall_clock_rate.value - - -def get_arch_string(device_id=None): - if device_id is None: - device_id = get_device_id() - arch_full = torch.cuda.get_device_properties(device_id).gcnArchName - arch_name = arch_full.split(":")[0] - return arch_name - - -def get_num_xcc(device_id=None): - if device_id is None: - device_id = get_device_id() - rocm_major, _ = get_rocm_version() - if rocm_major < 7: - return 8 - hipDeviceAttributeNumberOfXccs = 10018 - xcc_count = ctypes.c_int() - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) - return xcc_count.value - - -def malloc_fine_grained(size): - hipDeviceMallocFinegrained = 0x1 - ptr = ctypes.c_void_p() - hip_try(hip_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) - return ptr - - -def hip_malloc(size): - ptr = ctypes.c_void_p() - hip_try(hip_runtime.hipMalloc(ctypes.byref(ptr), size)) - return ptr - - -def hip_free(ptr): - hip_try(hip_runtime.hipFree(ptr)) diff --git a/iris/hip.py b/iris/hip.py index a731292c..a13acd07 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -1,47 +1,261 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. -""" -HIP-compatible API facade for Iris. +import ctypes +import numpy as np +import sys +import torch +import subprocess +import os -This module provides a HIP-compatible interface that transparently redirects -to either the HIP backend (AMD GPUs) or CUDA backend (NVIDIA GPUs) based on -auto-detection. -""" +# Auto-detect backend +_is_amd_backend = True +try: + rt_path = "libamdhip64.so" + hip_runtime = ctypes.cdll.LoadLibrary(rt_path) +except OSError: + try: + rt_path = "libcudart.so" + hip_runtime = ctypes.cdll.LoadLibrary(rt_path) + _is_amd_backend = False + except OSError: + # Default to HIP for backward compatibility + rt_path = "libamdhip64.so" + hip_runtime = ctypes.cdll.LoadLibrary(rt_path) -import ctypes + +def hip_try(err): + if err != 0: + if _is_amd_backend: + hip_runtime.hipGetErrorString.restype = ctypes.c_char_p + error_string = hip_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") + raise RuntimeError(f"HIP error code {err}: {error_string}") + else: + hip_runtime.cudaGetErrorString.restype = ctypes.c_char_p + error_string = hip_runtime.cudaGetErrorString(ctypes.c_int(err)).decode("utf-8") + raise RuntimeError(f"CUDA error code {err}: {error_string}") + + +class hipIpcMemHandle_t(ctypes.Structure): + if _is_amd_backend: + _fields_ = [("reserved", ctypes.c_char * 64)] + else: + _fields_ = [("reserved", ctypes.c_char * 128)] + + +def get_ipc_handle_size(): + """Return the IPC handle size for the current backend.""" + return 64 if _is_amd_backend else 128 + + +def open_ipc_handle(ipc_handle_data, rank): + ptr = ctypes.c_void_p() + handle_size = get_ipc_handle_size() + + if _is_amd_backend: + hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) + hip_runtime.hipIpcOpenMemHandle.argtypes = [ + ctypes.POINTER(ctypes.c_void_p), + hipIpcMemHandle_t, + ctypes.c_uint, + ] + else: + hip_runtime.cudaIpcOpenMemHandle.argtypes = [ + ctypes.POINTER(ctypes.c_void_p), + hipIpcMemHandle_t, + ctypes.c_uint, + ] + cudaIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) + + if isinstance(ipc_handle_data, np.ndarray): + if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != handle_size: + raise ValueError(f"ipc_handle_data must be a {handle_size}-element uint8 numpy array") + ipc_handle_bytes = ipc_handle_data.tobytes() + ipc_handle_data = (ctypes.c_char * handle_size).from_buffer_copy(ipc_handle_bytes) + else: + raise TypeError(f"ipc_handle_data must be a numpy.ndarray of dtype uint8 with {handle_size} elements") + + raw_memory = ctypes.create_string_buffer(handle_size) + ctypes.memset(raw_memory, 0x00, handle_size) + ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) + ipc_handle_data_bytes = bytes(ipc_handle_data) + ctypes.memmove(raw_memory, ipc_handle_data_bytes, handle_size) + + if _is_amd_backend: + hip_try( + hip_runtime.hipIpcOpenMemHandle( + ctypes.byref(ptr), + ipc_handle_struct, + hipIpcMemLazyEnablePeerAccess, + ) + ) + else: + hip_try( + hip_runtime.cudaIpcOpenMemHandle( + ctypes.byref(ptr), + ipc_handle_struct, + cudaIpcMemLazyEnablePeerAccess, + ) + ) + + return ptr.value + + +def get_ipc_handle(ptr, rank): + ipc_handle = hipIpcMemHandle_t() + if _is_amd_backend: + hip_try(hip_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + else: + hip_try(hip_runtime.cudaIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + return ipc_handle + + +def count_devices(): + device_count = ctypes.c_int() + if _is_amd_backend: + hip_try(hip_runtime.hipGetDeviceCount(ctypes.byref(device_count))) + else: + hip_try(hip_runtime.cudaGetDeviceCount(ctypes.byref(device_count))) + return device_count.value -# Detect backend -def _detect_backend(): - """Detect which backend to use based on available libraries.""" - # Auto-detect by trying to load libraries +def set_device(gpu_id): + if _is_amd_backend: + hip_try(hip_runtime.hipSetDevice(gpu_id)) + else: + hip_try(hip_runtime.cudaSetDevice(gpu_id)) + + +def get_device_id(): + device_id = ctypes.c_int() + if _is_amd_backend: + hip_try(hip_runtime.hipGetDevice(ctypes.byref(device_id))) + else: + hip_try(hip_runtime.cudaGetDevice(ctypes.byref(device_id))) + return device_id.value + + +def get_cu_count(device_id=None): + if device_id is None: + device_id = get_device_id() + + cu_count = ctypes.c_int() + + if _is_amd_backend: + hipDeviceAttributeMultiprocessorCount = 63 + hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) + else: + cudaDevAttrMultiProcessorCount = 16 + hip_try(hip_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDevAttrMultiProcessorCount, device_id)) + + return cu_count.value + + +def get_rocm_version(): + if not _is_amd_backend: + # Not applicable for CUDA + return (-1, -1) + + major, minor = -1, -1 + + # Try hipconfig --path first try: - ctypes.cdll.LoadLibrary("libamdhip64.so") - return "hip" - except (OSError, FileNotFoundError): - pass + result = subprocess.run(["hipconfig", "--path"], capture_output=True, text=True, check=True) + rocm_path = result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError): + # Then look for $ROCM_PATH environment variable + rocm_path = os.environ.get("ROCM_PATH") + if not rocm_path: + # Finally, try default location + rocm_path = "/opt/rocm" + # Try to read version from .info/version file try: - ctypes.cdll.LoadLibrary("libcudart.so") - return "cuda" - except (OSError, FileNotFoundError): + version_file_path = os.path.join(rocm_path, ".info", "version") + with open(version_file_path, "r") as version_file: + version = version_file.readline().strip() + major = int(version.split(".")[0]) + minor = int(version.split(".")[1]) + except (FileNotFoundError, IOError, ValueError, IndexError): + # If we can't read the version file, return -1, -1 pass - # Default to hip for backward compatibility - return "hip" + return (major, minor) + + +def get_wall_clock_rate(device_id): + wall_clock_rate = ctypes.c_int() + + if _is_amd_backend: + hipDeviceAttributeWallClockRate = 10017 + status = hip_runtime.hipDeviceGetAttribute( + ctypes.byref(wall_clock_rate), hipDeviceAttributeWallClockRate, device_id + ) + else: + cudaDevAttrClockRate = 13 + status = hip_runtime.cudaDeviceGetAttribute( + ctypes.byref(wall_clock_rate), cudaDevAttrClockRate, device_id + ) + + hip_try(status) + return wall_clock_rate.value + + +def get_arch_string(device_id=None): + if device_id is None: + device_id = get_device_id() + + if _is_amd_backend: + arch_full = torch.cuda.get_device_properties(device_id).gcnArchName + arch_name = arch_full.split(":")[0] + return arch_name + else: + # For CUDA, return compute capability + props = torch.cuda.get_device_properties(device_id) + return f"sm_{props.major}{props.minor}" + + +def get_num_xcc(device_id=None): + if device_id is None: + device_id = get_device_id() + + if not _is_amd_backend: + # XCC is AMD-specific, return 1 for CUDA + return 1 + + rocm_major, _ = get_rocm_version() + if rocm_major < 7: + return 8 + hipDeviceAttributeNumberOfXccs = 10018 + xcc_count = ctypes.c_int() + hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) + return xcc_count.value + +def malloc_fine_grained(size): + ptr = ctypes.c_void_p() + + if _is_amd_backend: + hipDeviceMallocFinegrained = 0x1 + hip_try(hip_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) + else: + # CUDA doesn't have direct equivalent, use regular malloc + hip_try(hip_runtime.cudaMalloc(ctypes.byref(ptr), size)) + + return ptr -_backend = _detect_backend() -# Import all public symbols from the appropriate backend module -if _backend == "cuda": - from iris._cuda import * # noqa: F403, F401 -else: - from iris._hip import * # noqa: F403, F401 +def hip_malloc(size): + ptr = ctypes.c_void_p() + if _is_amd_backend: + hip_try(hip_runtime.hipMalloc(ctypes.byref(ptr), size)) + else: + hip_try(hip_runtime.cudaMalloc(ctypes.byref(ptr), size)) + return ptr -# Make backend information available -def get_backend(): - """Get the currently active backend name ('hip' or 'cuda').""" - return _backend +def hip_free(ptr): + if _is_amd_backend: + hip_try(hip_runtime.hipFree(ptr)) + else: + hip_try(hip_runtime.cudaFree(ptr)) From 5f53ebe08e6ade6ea6ffe395b3320e5cb81ad0f1 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Wed, 8 Oct 2025 12:55:19 -0700 Subject: [PATCH 18/22] Add `setuptools` requirements --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2131d3f1..27eed85e 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ if __name__ == "__main__": ### Quick Installation > [!NOTE] -> **Requirements**: Python 3.10+, PyTorch 2.0+ (ROCm version), ROCm 6.3.1+ HIP runtime, and Triton +> **Requirements**: Python 3.10+, PyTorch 2.0+ (ROCm version), ROCm 6.3.1+ HIP runtime, Triton, and setuptools>=61 For a quick installation directly from the repository: From f06972ad313619265b36bcf9142687758ffb6685 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Wed, 8 Oct 2025 12:55:43 -0700 Subject: [PATCH 19/22] Name generic functions` gpu*` --- iris/hip.py | 87 +++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 46 deletions(-) diff --git a/iris/hip.py b/iris/hip.py index a13acd07..423ef88d 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -12,41 +12,36 @@ _is_amd_backend = True try: rt_path = "libamdhip64.so" - hip_runtime = ctypes.cdll.LoadLibrary(rt_path) + gpu_runtime = ctypes.cdll.LoadLibrary(rt_path) except OSError: try: rt_path = "libcudart.so" - hip_runtime = ctypes.cdll.LoadLibrary(rt_path) + gpu_runtime = ctypes.cdll.LoadLibrary(rt_path) _is_amd_backend = False except OSError: - # Default to HIP for backward compatibility rt_path = "libamdhip64.so" - hip_runtime = ctypes.cdll.LoadLibrary(rt_path) + gpu_runtime = ctypes.cdll.LoadLibrary(rt_path) -def hip_try(err): +def gpu_try(err): if err != 0: if _is_amd_backend: - hip_runtime.hipGetErrorString.restype = ctypes.c_char_p - error_string = hip_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") + gpu_runtime.hipGetErrorString.restype = ctypes.c_char_p + error_string = gpu_runtime.hipGetErrorString(ctypes.c_int(err)).decode("utf-8") raise RuntimeError(f"HIP error code {err}: {error_string}") else: - hip_runtime.cudaGetErrorString.restype = ctypes.c_char_p - error_string = hip_runtime.cudaGetErrorString(ctypes.c_int(err)).decode("utf-8") + gpu_runtime.cudaGetErrorString.restype = ctypes.c_char_p + error_string = gpu_runtime.cudaGetErrorString(ctypes.c_int(err)).decode("utf-8") raise RuntimeError(f"CUDA error code {err}: {error_string}") -class hipIpcMemHandle_t(ctypes.Structure): - if _is_amd_backend: - _fields_ = [("reserved", ctypes.c_char * 64)] - else: - _fields_ = [("reserved", ctypes.c_char * 128)] - - def get_ipc_handle_size(): """Return the IPC handle size for the current backend.""" return 64 if _is_amd_backend else 128 +class gpuIpcMemHandle_t(ctypes.Structure): + _fields_ = [("reserved", ctypes.c_char * get_ipc_handle_size())] + def open_ipc_handle(ipc_handle_data, rank): ptr = ctypes.c_void_p() @@ -54,15 +49,15 @@ def open_ipc_handle(ipc_handle_data, rank): if _is_amd_backend: hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) - hip_runtime.hipIpcOpenMemHandle.argtypes = [ + gpu_runtime.hipIpcOpenMemHandle.argtypes = [ ctypes.POINTER(ctypes.c_void_p), - hipIpcMemHandle_t, + gpuIpcMemHandle_t, ctypes.c_uint, ] else: - hip_runtime.cudaIpcOpenMemHandle.argtypes = [ + gpu_runtime.cudaIpcOpenMemHandle.argtypes = [ ctypes.POINTER(ctypes.c_void_p), - hipIpcMemHandle_t, + gpuIpcMemHandle_t, ctypes.c_uint, ] cudaIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) @@ -77,21 +72,21 @@ def open_ipc_handle(ipc_handle_data, rank): raw_memory = ctypes.create_string_buffer(handle_size) ctypes.memset(raw_memory, 0x00, handle_size) - ipc_handle_struct = hipIpcMemHandle_t.from_buffer(raw_memory) + ipc_handle_struct = gpuIpcMemHandle_t.from_buffer(raw_memory) ipc_handle_data_bytes = bytes(ipc_handle_data) ctypes.memmove(raw_memory, ipc_handle_data_bytes, handle_size) if _is_amd_backend: - hip_try( - hip_runtime.hipIpcOpenMemHandle( + gpu_try( + gpu_runtime.hipIpcOpenMemHandle( ctypes.byref(ptr), ipc_handle_struct, hipIpcMemLazyEnablePeerAccess, ) ) else: - hip_try( - hip_runtime.cudaIpcOpenMemHandle( + gpu_try( + gpu_runtime.cudaIpcOpenMemHandle( ctypes.byref(ptr), ipc_handle_struct, cudaIpcMemLazyEnablePeerAccess, @@ -102,36 +97,36 @@ def open_ipc_handle(ipc_handle_data, rank): def get_ipc_handle(ptr, rank): - ipc_handle = hipIpcMemHandle_t() + ipc_handle = gpuIpcMemHandle_t() if _is_amd_backend: - hip_try(hip_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + gpu_try(gpu_runtime.hipIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) else: - hip_try(hip_runtime.cudaIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) + gpu_try(gpu_runtime.cudaIpcGetMemHandle(ctypes.byref(ipc_handle), ptr)) return ipc_handle def count_devices(): device_count = ctypes.c_int() if _is_amd_backend: - hip_try(hip_runtime.hipGetDeviceCount(ctypes.byref(device_count))) + gpu_try(gpu_runtime.hipGetDeviceCount(ctypes.byref(device_count))) else: - hip_try(hip_runtime.cudaGetDeviceCount(ctypes.byref(device_count))) + gpu_try(gpu_runtime.cudaGetDeviceCount(ctypes.byref(device_count))) return device_count.value def set_device(gpu_id): if _is_amd_backend: - hip_try(hip_runtime.hipSetDevice(gpu_id)) + gpu_try(gpu_runtime.hipSetDevice(gpu_id)) else: - hip_try(hip_runtime.cudaSetDevice(gpu_id)) + gpu_try(gpu_runtime.cudaSetDevice(gpu_id)) def get_device_id(): device_id = ctypes.c_int() if _is_amd_backend: - hip_try(hip_runtime.hipGetDevice(ctypes.byref(device_id))) + gpu_try(gpu_runtime.hipGetDevice(ctypes.byref(device_id))) else: - hip_try(hip_runtime.cudaGetDevice(ctypes.byref(device_id))) + gpu_try(gpu_runtime.cudaGetDevice(ctypes.byref(device_id))) return device_id.value @@ -143,10 +138,10 @@ def get_cu_count(device_id=None): if _is_amd_backend: hipDeviceAttributeMultiprocessorCount = 63 - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) + gpu_try(gpu_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) else: cudaDevAttrMultiProcessorCount = 16 - hip_try(hip_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDevAttrMultiProcessorCount, device_id)) + gpu_try(gpu_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDevAttrMultiProcessorCount, device_id)) return cu_count.value @@ -188,16 +183,16 @@ def get_wall_clock_rate(device_id): if _is_amd_backend: hipDeviceAttributeWallClockRate = 10017 - status = hip_runtime.hipDeviceGetAttribute( + status = gpu_runtime.hipDeviceGetAttribute( ctypes.byref(wall_clock_rate), hipDeviceAttributeWallClockRate, device_id ) else: cudaDevAttrClockRate = 13 - status = hip_runtime.cudaDeviceGetAttribute( + status = gpu_runtime.cudaDeviceGetAttribute( ctypes.byref(wall_clock_rate), cudaDevAttrClockRate, device_id ) - hip_try(status) + gpu_try(status) return wall_clock_rate.value @@ -228,7 +223,7 @@ def get_num_xcc(device_id=None): return 8 hipDeviceAttributeNumberOfXccs = 10018 xcc_count = ctypes.c_int() - hip_try(hip_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) + gpu_try(gpu_runtime.hipDeviceGetAttribute(ctypes.byref(xcc_count), hipDeviceAttributeNumberOfXccs, device_id)) return xcc_count.value @@ -237,10 +232,10 @@ def malloc_fine_grained(size): if _is_amd_backend: hipDeviceMallocFinegrained = 0x1 - hip_try(hip_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) + gpu_try(gpu_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) else: # CUDA doesn't have direct equivalent, use regular malloc - hip_try(hip_runtime.cudaMalloc(ctypes.byref(ptr), size)) + gpu_try(gpu_runtime.cudaMalloc(ctypes.byref(ptr), size)) return ptr @@ -248,14 +243,14 @@ def malloc_fine_grained(size): def hip_malloc(size): ptr = ctypes.c_void_p() if _is_amd_backend: - hip_try(hip_runtime.hipMalloc(ctypes.byref(ptr), size)) + gpu_try(gpu_runtime.hipMalloc(ctypes.byref(ptr), size)) else: - hip_try(hip_runtime.cudaMalloc(ctypes.byref(ptr), size)) + gpu_try(gpu_runtime.cudaMalloc(ctypes.byref(ptr), size)) return ptr def hip_free(ptr): if _is_amd_backend: - hip_try(hip_runtime.hipFree(ptr)) + gpu_try(gpu_runtime.hipFree(ptr)) else: - hip_try(hip_runtime.cudaFree(ptr)) + gpu_try(gpu_runtime.cudaFree(ptr)) From caf6ed0b6977957d481d97add595b4235b9baefb Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Wed, 8 Oct 2025 12:56:12 -0700 Subject: [PATCH 20/22] Add necessay conversion for RCCL --- iris/iris.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/iris/iris.py b/iris/iris.py index 5a9b5142..22627058 100644 --- a/iris/iris.py +++ b/iris/iris.py @@ -96,8 +96,10 @@ def __init__(self, heap_size=1 << 30): distributed_barrier() - all_ipc_handles = distributed_allgather(np.frombuffer(ipc_handle, dtype=np.uint8)) - all_heap_bases = distributed_allgather(np.array([heap_bases[cur_rank]], dtype=np.uint64)) + all_ipc_handles = distributed_allgather(np.frombuffer(ipc_handle, dtype=np.uint8).copy()) + heap_base_bytes = np.array([heap_bases[cur_rank]], dtype=np.uint64).tobytes() + all_heap_bases_bytes = distributed_allgather(np.frombuffer(heap_base_bytes, dtype=np.uint8).copy()) + all_heap_bases = np.frombuffer(all_heap_bases_bytes.tobytes(), dtype=np.uint64).reshape(num_ranks, -1) distributed_barrier() From 73b0835c16163e7f5aef4a50689c20f4dc13a255 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 8 Oct 2025 19:57:16 +0000 Subject: [PATCH 21/22] Apply Ruff auto-fixes --- iris/hip.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/iris/hip.py b/iris/hip.py index 423ef88d..89807860 100644 --- a/iris/hip.py +++ b/iris/hip.py @@ -39,6 +39,7 @@ def get_ipc_handle_size(): """Return the IPC handle size for the current backend.""" return 64 if _is_amd_backend else 128 + class gpuIpcMemHandle_t(ctypes.Structure): _fields_ = [("reserved", ctypes.c_char * get_ipc_handle_size())] @@ -46,7 +47,7 @@ class gpuIpcMemHandle_t(ctypes.Structure): def open_ipc_handle(ipc_handle_data, rank): ptr = ctypes.c_void_p() handle_size = get_ipc_handle_size() - + if _is_amd_backend: hipIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) gpu_runtime.hipIpcOpenMemHandle.argtypes = [ @@ -61,7 +62,7 @@ def open_ipc_handle(ipc_handle_data, rank): ctypes.c_uint, ] cudaIpcMemLazyEnablePeerAccess = ctypes.c_uint(1) - + if isinstance(ipc_handle_data, np.ndarray): if ipc_handle_data.dtype != np.uint8 or ipc_handle_data.size != handle_size: raise ValueError(f"ipc_handle_data must be a {handle_size}-element uint8 numpy array") @@ -135,10 +136,12 @@ def get_cu_count(device_id=None): device_id = get_device_id() cu_count = ctypes.c_int() - + if _is_amd_backend: hipDeviceAttributeMultiprocessorCount = 63 - gpu_try(gpu_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id)) + gpu_try( + gpu_runtime.hipDeviceGetAttribute(ctypes.byref(cu_count), hipDeviceAttributeMultiprocessorCount, device_id) + ) else: cudaDevAttrMultiProcessorCount = 16 gpu_try(gpu_runtime.cudaDeviceGetAttribute(ctypes.byref(cu_count), cudaDevAttrMultiProcessorCount, device_id)) @@ -150,7 +153,7 @@ def get_rocm_version(): if not _is_amd_backend: # Not applicable for CUDA return (-1, -1) - + major, minor = -1, -1 # Try hipconfig --path first @@ -180,7 +183,7 @@ def get_rocm_version(): def get_wall_clock_rate(device_id): wall_clock_rate = ctypes.c_int() - + if _is_amd_backend: hipDeviceAttributeWallClockRate = 10017 status = gpu_runtime.hipDeviceGetAttribute( @@ -188,10 +191,8 @@ def get_wall_clock_rate(device_id): ) else: cudaDevAttrClockRate = 13 - status = gpu_runtime.cudaDeviceGetAttribute( - ctypes.byref(wall_clock_rate), cudaDevAttrClockRate, device_id - ) - + status = gpu_runtime.cudaDeviceGetAttribute(ctypes.byref(wall_clock_rate), cudaDevAttrClockRate, device_id) + gpu_try(status) return wall_clock_rate.value @@ -199,7 +200,7 @@ def get_wall_clock_rate(device_id): def get_arch_string(device_id=None): if device_id is None: device_id = get_device_id() - + if _is_amd_backend: arch_full = torch.cuda.get_device_properties(device_id).gcnArchName arch_name = arch_full.split(":")[0] @@ -213,11 +214,11 @@ def get_arch_string(device_id=None): def get_num_xcc(device_id=None): if device_id is None: device_id = get_device_id() - + if not _is_amd_backend: # XCC is AMD-specific, return 1 for CUDA return 1 - + rocm_major, _ = get_rocm_version() if rocm_major < 7: return 8 @@ -229,14 +230,14 @@ def get_num_xcc(device_id=None): def malloc_fine_grained(size): ptr = ctypes.c_void_p() - + if _is_amd_backend: hipDeviceMallocFinegrained = 0x1 gpu_try(gpu_runtime.hipExtMallocWithFlags(ctypes.byref(ptr), size, hipDeviceMallocFinegrained)) else: # CUDA doesn't have direct equivalent, use regular malloc gpu_try(gpu_runtime.cudaMalloc(ctypes.byref(ptr), size)) - + return ptr From abaa8539999b5d386acabda1e1ab3229b027db24 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Wed, 8 Oct 2025 13:27:46 -0700 Subject: [PATCH 22/22] Remove the git ignore comment --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 78645c5a..cbaac1bc 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,4 @@ slurm*.out *.egg-info examples/gemm/results/* -asm/ - -# Backend configuration (generated \ No newline at end of file +asm/ \ No newline at end of file