Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 210 additions & 0 deletions cmake/thirdparty/get_pmix.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
# =============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
# =============================================================================

#[=======================================================================[.rst:
get_pmix
--------

Find the PMIx (Process Management Interface - Exascale) library.

This module finds the PMIx library, which is typically provided by Slurm
or OpenPMIx installations. PMIx enables scalable process coordination
without requiring a shared filesystem.

Imported Targets
^^^^^^^^^^^^^^^^

This module provides the following imported targets, if found:

``PMIx::PMIx``
The PMIx library

Result Variables
^^^^^^^^^^^^^^^^

This will define the following variables:

``PMIx_FOUND``
True if the system has the PMIx library.
``PMIx_VERSION``
The version of the PMIx library which was found.
``PMIx_INCLUDE_DIRS``
Include directories needed to use PMIx.
``PMIx_LIBRARIES``
Libraries needed to link to PMIx.

Hints
^^^^^

The following variables can be set to help find PMIx:

``PMIx_ROOT``
Root directory of PMIx installation.
``PMIX_ROOT``
Alternative root directory variable.
``SLURM_ROOT``
Slurm installation directory (PMIx may be bundled with Slurm).

#]=======================================================================]

# Extract PMIx version from header file. Sets PMIx_VERSION in parent scope if version can be
# determined.
function(_pmix_extract_version include_dir)
if(NOT EXISTS "${include_dir}/pmix_version.h")
return()
endif()

file(STRINGS "${include_dir}/pmix_version.h" _pmix_version_lines
REGEX "#define[ \t]+PMIX_(MAJOR|MINOR|RELEASE)_VERSION"
)

foreach(_line ${_pmix_version_lines})
if(_line MATCHES "#define[ \t]+PMIX_MAJOR_VERSION[ \t]+([0-9]+)")
set(_pmix_major "${CMAKE_MATCH_1}")
elseif(_line MATCHES "#define[ \t]+PMIX_MINOR_VERSION[ \t]+([0-9]+)")
set(_pmix_minor "${CMAKE_MATCH_1}")
elseif(_line MATCHES "#define[ \t]+PMIX_RELEASE_VERSION[ \t]+([0-9]+)")
set(_pmix_release "${CMAKE_MATCH_1}")
endif()
endforeach()

if(DEFINED _pmix_major
AND DEFINED _pmix_minor
AND DEFINED _pmix_release
)
set(PMIx_VERSION
"${_pmix_major}.${_pmix_minor}.${_pmix_release}"
PARENT_SCOPE
)
elseif(DEFINED _pmix_major AND DEFINED _pmix_minor)
set(PMIx_VERSION
"${_pmix_major}.${_pmix_minor}"
PARENT_SCOPE
)
endif()
endfunction()

# Create the PMIx::PMIx imported target and find optional dependencies.
function(_pmix_create_target library include_dir)
if(TARGET PMIx::PMIx)
return()
endif()

add_library(PMIx::PMIx UNKNOWN IMPORTED)
set_target_properties(
PMIx::PMIx PROPERTIES IMPORTED_LOCATION "${library}" INTERFACE_INCLUDE_DIRECTORIES
"${include_dir}"
)

# PMIx may have dependencies on libevent or hwloc. Try to find and link them if available.
find_library(EVENT_CORE_LIBRARY event_core)
find_library(EVENT_PTHREADS_LIBRARY event_pthreads)
find_library(HWLOC_LIBRARY hwloc)

set(_pmix_extra_libs "")
foreach(_lib EVENT_CORE_LIBRARY EVENT_PTHREADS_LIBRARY HWLOC_LIBRARY)
if(${_lib})
list(APPEND _pmix_extra_libs "${${_lib}}")
endif()
endforeach()

if(_pmix_extra_libs)
set_property(
TARGET PMIx::PMIx
APPEND
PROPERTY INTERFACE_LINK_LIBRARIES "${_pmix_extra_libs}"
)
endif()

mark_as_advanced(
PMIx_INCLUDE_DIR PMIx_LIBRARY EVENT_CORE_LIBRARY EVENT_PTHREADS_LIBRARY HWLOC_LIBRARY
)
endfunction()

# Find and configure the PMIx library. Sets PMIx_FOUND, PMIx_VERSION, PMIx_INCLUDE_DIRS,
# PMIx_LIBRARIES in parent scope. Creates PMIx::PMIx imported target if found.
function(find_and_configure_pmix)
# Return early if already found
if(TARGET PMIx::PMIx)
set(PMIx_FOUND
TRUE
PARENT_SCOPE
)
return()
endif()

# First try pkg-config (most reliable method)
find_package(PkgConfig QUIET)
if(PKG_CONFIG_FOUND)
pkg_check_modules(PC_PMIx QUIET pmix)
endif()

# Find include directory
find_path(
PMIx_INCLUDE_DIR
NAMES pmix.h
HINTS ${PC_PMIx_INCLUDEDIR} ${PC_PMIx_INCLUDE_DIRS} ${PMIx_ROOT}/include $ENV{PMIx_ROOT}/include
$ENV{PMIX_ROOT}/include ${SLURM_ROOT}/include $ENV{SLURM_ROOT}/include
PATHS /usr/include /usr/local/include /opt/pmix/include /usr/include/slurm
/usr/local/include/slurm
)

# Find library
find_library(
PMIx_LIBRARY
NAMES pmix
HINTS ${PC_PMIx_LIBDIR}
${PC_PMIx_LIBRARY_DIRS}
${PMIx_ROOT}/lib
${PMIx_ROOT}/lib64
$ENV{PMIx_ROOT}/lib
$ENV{PMIx_ROOT}/lib64
$ENV{PMIX_ROOT}/lib
$ENV{PMIX_ROOT}/lib64
${SLURM_ROOT}/lib
${SLURM_ROOT}/lib64
$ENV{SLURM_ROOT}/lib
$ENV{SLURM_ROOT}/lib64
PATHS /usr/lib /usr/lib64 /usr/local/lib /usr/local/lib64 /opt/pmix/lib /opt/pmix/lib64
)

# Get version from header if found
if(PMIx_INCLUDE_DIR)
_pmix_extract_version("${PMIx_INCLUDE_DIR}")
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
PMIx
REQUIRED_VARS PMIx_LIBRARY PMIx_INCLUDE_DIR
VERSION_VAR PMIx_VERSION
)

if(PMIx_FOUND)
_pmix_create_target("${PMIx_LIBRARY}" "${PMIx_INCLUDE_DIR}")
endif()

# Export results to parent scope
set(PMIx_FOUND
${PMIx_FOUND}
PARENT_SCOPE
)
if(DEFINED PMIx_VERSION)
set(PMIx_VERSION
${PMIx_VERSION}
PARENT_SCOPE
)
endif()
set(PMIx_INCLUDE_DIRS
${PMIx_INCLUDE_DIR}
PARENT_SCOPE
)
set(PMIx_LIBRARIES
${PMIx_LIBRARY}
PARENT_SCOPE
)
endfunction()
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies:
- ipython
- libcudf==26.4.*,>=0.0.0a0
- libnuma
- libpmix-devel >=5.0,<6.0
- librmm==26.4.*,>=0.0.0a0
- libucxx==0.49.*,>=0.0.0a0
- make
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies:
- ipython
- libcudf==26.4.*,>=0.0.0a0
- libnuma
- libpmix-devel >=5.0,<6.0
- librmm==26.4.*,>=0.0.0a0
- libucxx==0.49.*,>=0.0.0a0
- make
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-131_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies:
- ipython
- libcudf==26.4.*,>=0.0.0a0
- libnuma
- libpmix-devel >=5.0,<6.0
- librmm==26.4.*,>=0.0.0a0
- libucxx==0.49.*,>=0.0.0a0
- make
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-131_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies:
- ipython
- libcudf==26.4.*,>=0.0.0a0
- libnuma
- libpmix-devel >=5.0,<6.0
- librmm==26.4.*,>=0.0.0a0
- libucxx==0.49.*,>=0.0.0a0
- make
Expand Down
5 changes: 4 additions & 1 deletion conda/recipes/librapidsmpf/recipe.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
schema_version: 1

Expand Down Expand Up @@ -79,6 +79,7 @@ cache:
- cuda-nvml-dev
- libcudf =${{ minor_version }}
- libnuma
- libpmix-devel >=5.0,<6.0
- librmm =${{ minor_version }}
- openmpi >=5.0 # See <https://github.com/rapidsai/rapidsmpf/issues/17>
- ucxx ${{ ucxx_version }}
Expand Down Expand Up @@ -115,6 +116,7 @@ outputs:
- cuda-cupti-dev
- cuda-nvml-dev
- libcudf =${{ minor_version }}
- libpmix-devel >=5.0,<6.0
- openmpi >=5.0
- ucxx ${{ ucxx_version }}
run:
Expand All @@ -123,6 +125,7 @@ outputs:
- cuda-cupti
- librmm =${{ minor_version }}
- libcudf =${{ minor_version }}
- libpmix >=5.0,<6.0
- openmpi >=5.0 # See <https://github.com/rapidsai/rapidsmpf/issues/17>
- ucxx ${{ ucxx_version }}
ignore_run_exports:
Expand Down
21 changes: 21 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ option(BUILD_UCXX_SUPPORT "Build RapidsMPF with UCXX support" ON)
option(BUILD_STREAMING_SUPPORT "Build RapidsMPF with streaming support" ON)
option(BUILD_CUPTI_SUPPORT "Build RapidsMPF with CUPTI support" OFF)
option(BUILD_NUMA_SUPPORT "Build RapidsMPF with NUMA support" ON)
option(BUILD_SLURM_SUPPORT "Build RapidsMPF with Slurm/PMIx bootstrap support" ON)
option(BUILD_TESTS "Configure CMake to build tests" ON)
option(BUILD_BENCHMARKS "Configure CMake to build benchmarks" ON)
option(BUILD_EXAMPLES "Configure CMake to build examples" ON)
Expand All @@ -62,6 +63,7 @@ message(STATUS " BUILD_UCXX_SUPPORT : ${BUILD_UCXX_SUPPORT}")
message(STATUS " BUILD_STREAMING_SUPPORT : ${BUILD_STREAMING_SUPPORT}")
message(STATUS " BUILD_CUPTI_SUPPORT : ${BUILD_CUPTI_SUPPORT}")
message(STATUS " BUILD_NUMA_SUPPORT : ${BUILD_NUMA_SUPPORT}")
message(STATUS " BUILD_SLURM_SUPPORT : ${BUILD_SLURM_SUPPORT}")
message(STATUS " BUILD_TESTS : ${BUILD_TESTS}")
message(STATUS " BUILD_BENCHMARKS : ${BUILD_BENCHMARKS}")
message(STATUS " BUILD_EXAMPLES : ${BUILD_EXAMPLES}")
Expand All @@ -77,6 +79,7 @@ set(RAPIDSMPF_HAVE_UCXX ${BUILD_UCXX_SUPPORT})
set(RAPIDSMPF_HAVE_STREAMING ${BUILD_STREAMING_SUPPORT})
set(RAPIDSMPF_HAVE_CUPTI ${BUILD_CUPTI_SUPPORT})
set(RAPIDSMPF_HAVE_NUMA ${BUILD_NUMA_SUPPORT})
set(RAPIDSMPF_HAVE_SLURM OFF) # Will be set to ON if PMIx is found
set(RAPIDSMPF_BUILD_TESTS ${BUILD_TESTS})
set(RAPIDSMPF_BUILD_BENCHMARKS ${BUILD_BENCHMARKS})
set(RAPIDSMPF_BUILD_EXAMPLES ${BUILD_EXAMPLES})
Expand Down Expand Up @@ -156,6 +159,18 @@ endif()
if(RAPIDSMPF_HAVE_STREAMING)
include(../cmake/thirdparty/get_libcoro.cmake)
endif()
if(BUILD_SLURM_SUPPORT)
include(../cmake/thirdparty/get_pmix.cmake)
find_and_configure_pmix()
if(PMIx_FOUND)
set(RAPIDSMPF_HAVE_SLURM ON)
message(STATUS "PMIx found (version ${PMIx_VERSION}) - Slurm bootstrap backend enabled")
else()
message(WARNING "PMIx not found - Slurm bootstrap backend will be disabled. "
"Set PMIx_ROOT or PMIX_ROOT to the PMIx installation directory."
)
endif()
endif()

# ##################################################################################################
# * library targets --------------------------------------------------------------------------------
Expand Down Expand Up @@ -236,6 +251,9 @@ endif()
if(RAPIDSMPF_HAVE_CUPTI)
target_sources(rapidsmpf PRIVATE src/cupti.cpp)
endif()
if(RAPIDSMPF_HAVE_SLURM)
target_sources(rapidsmpf PRIVATE src/bootstrap/slurm_backend.cpp)
endif()

set_target_properties(
rapidsmpf
Expand Down Expand Up @@ -300,6 +318,7 @@ target_link_libraries(
$<$<BOOL:${RAPIDSMPF_HAVE_NUMA}>:numa>
$<TARGET_NAME_IF_EXISTS:MPI::MPI_C>
$<$<BOOL:${RAPIDSMPF_HAVE_CUPTI}>:CUDA::cupti>
$<TARGET_NAME_IF_EXISTS:PMIx::PMIx>
$<TARGET_NAME_IF_EXISTS:conda_env>
maybe_asan
$<TARGET_NAME_IF_EXISTS:CCCL::cudax>
Expand All @@ -315,6 +334,7 @@ target_compile_definitions(
$<$<BOOL:${RAPIDSMPF_HAVE_STREAMING}>:RAPIDSMPF_HAVE_STREAMING>
$<$<BOOL:${RAPIDSMPF_HAVE_CUPTI}>:RAPIDSMPF_HAVE_CUPTI>
$<$<BOOL:${RAPIDSMPF_HAVE_NUMA}>:RAPIDSMPF_HAVE_NUMA>
$<$<BOOL:${RAPIDSMPF_HAVE_SLURM}>:RAPIDSMPF_HAVE_SLURM>
$<$<BOOL:${RAPIDSMPF_VERBOSE_INFO}>:RAPIDSMPF_VERBOSE_INFO>
)

Expand Down Expand Up @@ -434,6 +454,7 @@ string(
"set(RAPIDSMPF_HAVE_UCXX [=[${RAPIDSMPF_HAVE_UCXX}]=])"
"set(RAPIDSMPF_HAVE_STREAMING [=[${RAPIDSMPF_HAVE_STREAMING}]=])"
"set(RAPIDSMPF_HAVE_CUPTI [=[${RAPIDSMPF_HAVE_CUPTI}]=])"
"set(RAPIDSMPF_HAVE_SLURM [=[${RAPIDSMPF_HAVE_SLURM}]=])"
)

rapids_export(
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/bench_comm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ int main(int argc, char** argv) {
if (use_bootstrap) {
// Launched with rrun - use bootstrap backend
comm = rapidsmpf::bootstrap::create_ucxx_comm(
rapidsmpf::bootstrap::Backend::AUTO, options
rapidsmpf::bootstrap::BackendType::AUTO, options
);
} else {
// Launched with mpirun - use MPI bootstrap
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/bench_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ int main(int argc, char** argv) {
if (use_bootstrap) {
// Launched with rrun - use bootstrap backend
comm = rapidsmpf::bootstrap::create_ucxx_comm(
rapidsmpf::bootstrap::Backend::AUTO, options
rapidsmpf::bootstrap::BackendType::AUTO, options
);
} else {
// Launched with mpirun - use MPI bootstrap
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/streaming/bench_streaming_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ int main(int argc, char** argv) {
if (use_bootstrap) {
// Launched with rrun - use bootstrap backend
comm = rapidsmpf::bootstrap::create_ucxx_comm(
rapidsmpf::bootstrap::Backend::AUTO, options
rapidsmpf::bootstrap::BackendType::AUTO, options
);
} else {
// Launched with mpirun - use MPI bootstrap
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/streaming/ndsh/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ std::shared_ptr<streaming::Context> create_context(
break;
case CommType::UCXX:
if (bootstrap::is_running_with_rrun()) {
comm = bootstrap::create_ucxx_comm(bootstrap::Backend::AUTO, options);
comm = bootstrap::create_ucxx_comm(bootstrap::BackendType::AUTO, options);
} else {
mpi::init(nullptr, nullptr);
comm = ucxx::init_using_mpi(MPI_COMM_WORLD, options);
Expand Down
Loading
Loading