From 040514f4e96d699c1e2751df6f9c09aeccb3cfbe Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 10:53:03 +0100 Subject: [PATCH 01/68] Add cicd-ext CI configuration --- .cscs-ci/container/build.Containerfile | 8 ++ .cscs-ci/container/deps.Containerfile | 24 ++++ .cscs-ci/default.yaml | 191 +++++++++++++++++++++++++ .cscs-ci/spack/libfabric.yaml | 6 + .cscs-ci/spack/mpi.yaml | 6 + .cscs-ci/spack/nccl.yaml | 6 + .cscs-ci/spack/ucx.yaml | 6 + test/CMakeLists.txt | 3 +- test/bindings/fortran/CMakeLists.txt | 3 +- 9 files changed, 251 insertions(+), 2 deletions(-) create mode 100644 .cscs-ci/container/build.Containerfile create mode 100644 .cscs-ci/container/deps.Containerfile create mode 100644 .cscs-ci/default.yaml create mode 100644 .cscs-ci/spack/libfabric.yaml create mode 100644 .cscs-ci/spack/mpi.yaml create mode 100644 .cscs-ci/spack/nccl.yaml create mode 100644 .cscs-ci/spack/ucx.yaml diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile new file mode 100644 index 00000000..784221cb --- /dev/null +++ b/.cscs-ci/container/build.Containerfile @@ -0,0 +1,8 @@ +ARG DEPS_IMAGE +FROM $DEPS_IMAGE + +COPY . /oomph +WORKDIR /oomph + +RUN spack -e ci build-env oomph -- cmake -B build -DOOMPH_WITH_TESTING=ON -DMPIEXEC_EXECUTABLE="" -DMPIEXEC_NUMPROC_FLAG="" -DMPIEXEC_PREFLAGS="" -DMPIEXEC_POSTFLAGS="" && \ + spack -e ci build-env oomph -- cmake --build build -j$(nproc) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile new file mode 100644 index 00000000..73c225c1 --- /dev/null +++ b/.cscs-ci/container/deps.Containerfile @@ -0,0 +1,24 @@ +FROM ghcr.io/eth-cscs/alps-images:py26.01-alps3-base + +ARG SPACK_SHA=develop +ARG SPACK_PACKAGES_SHA=main +ARG SPACK_ENV_FILE + +ENV DEBIAN_FRONTEND=noninteractive + +RUN mkdir -p /opt/spack && \ + curl -Ls "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack + +ENV PATH="/opt/spack/bin:$PATH" + +RUN mkdir -p /opt/spack-packages && \ + curl -Ls "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages + +RUN spack repo remove --scope defaults:base builtin && \ + spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin + +COPY $SPACK_ENV_FILE /spack_environment/spack.yaml + +RUN spack env create ci /spack_environment/spack.yaml && \ + spack -e ci concretize -f && \ + spack -e ci install --jobs $(nproc) --fail-fast --only=dependencies diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml new file mode 100644 index 00000000..959ba3d4 --- /dev/null +++ b/.cscs-ci/default.yaml @@ -0,0 +1,191 @@ +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + +stages: + - build_deps + - build + - test + +variables: + # The base image is the py26.01 alps3 image from docs.cscs.ch + BASE_IMAGE: ghcr.io/eth-cscs/alps-images:py26.01-alps3-base + SPACK_SHA: develop + SPACK_PACKAGES_SHA: main + FF_TIMESTAMPS: true + +.build_deps_template: + stage: build_deps + timeout: 1 hours + before_script: + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true + - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16` + - export ENV_FILE_SHA=`sha256sum ${SPACK_ENV_FILE} | head -c 16` + - export CONFIG_TAG=`echo $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | sha256sum - | head -c 16` + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-spack-deps-$BACKEND:$CONFIG_TAG + - echo -e "CONFIG_TAG=$CONFIG_TAG" >> base-${BACKEND}.env + - echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env + variables: + DOCKERFILE: .cscs-ci/container/deps.Containerfile + DOCKER_BUILD_ARGS: '["SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' + artifacts: + reports: + dotenv: base-${BACKEND}.env + +build_deps_nccl: + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + variables: + BACKEND: nccl + SPACK_ENV_FILE: .cscs-ci/spack/nccl.yaml + +build_deps_mpi: + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + variables: + BACKEND: mpi + SPACK_ENV_FILE: .cscs-ci/spack/mpi.yaml + +build_deps_ucx: + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + variables: + BACKEND: ucx + SPACK_ENV_FILE: .cscs-ci/spack/ucx.yaml + +build_deps_libfabric: + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + variables: + BACKEND: libfabric + SPACK_ENV_FILE: .cscs-ci/spack/libfabric.yaml + +.build_template: + stage: build + extends: .container-builder-cscs-gh200 + timeout: 1 hours + before_script: + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-build-$BACKEND:$CI_COMMIT_SHA + - echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env + variables: + DOCKERFILE: .cscs-ci/container/build.Containerfile + DOCKER_BUILD_ARGS: '["DEPS_IMAGE"]' + artifacts: + reports: + dotenv: build-${BACKEND}.env + +build_nccl: + extends: .build_template + needs: + - job: build_deps_nccl + artifacts: true + variables: + BACKEND: nccl + +build_mpi: + extends: .build_template + needs: + - job: build_deps_mpi + artifacts: true + variables: + BACKEND: mpi + +build_ucx: + extends: .build_template + needs: + - job: build_deps_ucx + artifacts: true + variables: + BACKEND: ucx + +build_libfabric: + extends: .build_template + needs: + - job: build_deps_libfabric + artifacts: true + variables: + BACKEND: libfabric + +.test_serial_template: + stage: test + extends: .container-runner-clariden-gh200 + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 1 + SLURM_TIMELIMIT: '00:15:00' + SLURM_PARTITION: normal + script: + - ctest --test-dir build -L "serial" --output-on-failure + +.test_parallel_template: + stage: test + extends: .container-runner-clariden-gh200 + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 4 + SLURM_TIMELIMIT: '00:15:00' + SLURM_PARTITION: normal + SLURM_MPI: pmix + MPICH_GPU_SUPPORT_ENABLED: 1 + script: + - srun -n 4 ctest --test-dir build -L "parallel-ranks-4" --output-on-failure + +test_serial_nccl: + extends: .test_serial_template + needs: + - job: build_nccl + artifacts: true + image: $BUILD_IMAGE + +test_parallel_nccl: + extends: .test_parallel_template + needs: + - job: build_nccl + artifacts: true + image: $BUILD_IMAGE + +test_serial_mpi: + extends: .test_serial_template + needs: + - job: build_mpi + artifacts: true + image: $BUILD_IMAGE + +test_parallel_mpi: + extends: .test_parallel_template + needs: + - job: build_mpi + artifacts: true + image: $BUILD_IMAGE + +test_serial_ucx: + extends: .test_serial_template + needs: + - job: build_ucx + artifacts: true + image: $BUILD_IMAGE + +test_parallel_ucx: + extends: .test_parallel_template + needs: + - job: build_ucx + artifacts: true + image: $BUILD_IMAGE + +test_serial_libfabric: + extends: .test_serial_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE + +test_parallel_libfabric: + extends: .test_parallel_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE diff --git a/.cscs-ci/spack/libfabric.yaml b/.cscs-ci/spack/libfabric.yaml new file mode 100644 index 00000000..f659f278 --- /dev/null +++ b/.cscs-ci/spack/libfabric.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph backend=libfabric +cuda +python + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/mpi.yaml b/.cscs-ci/spack/mpi.yaml new file mode 100644 index 00000000..696d894d --- /dev/null +++ b/.cscs-ci/spack/mpi.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph backend=mpi +cuda +python + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/nccl.yaml b/.cscs-ci/spack/nccl.yaml new file mode 100644 index 00000000..2dc59834 --- /dev/null +++ b/.cscs-ci/spack/nccl.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph backend=nccl +cuda +python + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/ucx.yaml b/.cscs-ci/spack/ucx.yaml new file mode 100644 index 00000000..76100e29 --- /dev/null +++ b/.cscs-ci/spack/ucx.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph backend=ucx +cuda +python + view: false + concretizer: + unify: true diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5217bbaf..31fea066 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,6 +48,7 @@ function(reg_serial_test t) add_test( NAME ${t} COMMAND $) + set_tests_properties(${t} PROPERTIES LABELS "serial") endfunction() foreach(t ${serial_tests}) @@ -65,7 +66,7 @@ function(reg_parallel_test t_ lib n) NAME ${t} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} $ ${MPIEXEC_POSTFLAGS}) - set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) + set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE LABELS "parallel-ranks-${n}") endfunction() if (OOMPH_WITH_MPI) diff --git a/test/bindings/fortran/CMakeLists.txt b/test/bindings/fortran/CMakeLists.txt index 974d2f7c..10e69e15 100644 --- a/test/bindings/fortran/CMakeLists.txt +++ b/test/bindings/fortran/CMakeLists.txt @@ -30,7 +30,8 @@ function(reg_parallel_test_f t_ lib n nthr) COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} $ ${MPIEXEC_POSTFLAGS}) set_tests_properties(${t} PROPERTIES - ENVIRONMENT OMP_NUM_THREADS=${nthr}) + ENVIRONMENT OMP_NUM_THREADS=${nthr} + LABELS "parallel-ranks-${n}") endfunction() if (OOMPH_WITH_MPI) From ba1784c191cc247422c4d6c2a7ae159482f5af9c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 16:45:03 +0100 Subject: [PATCH 02/68] Apply suggestions from code review Co-authored-by: Mikael Simberg --- .cscs-ci/container/deps.Containerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index 73c225c1..a07e4797 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -1,7 +1,7 @@ FROM ghcr.io/eth-cscs/alps-images:py26.01-alps3-base -ARG SPACK_SHA=develop -ARG SPACK_PACKAGES_SHA=main +ARG SPACK_SHA=v1.1.1 +ARG SPACK_PACKAGES_SHA=bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 ARG SPACK_ENV_FILE ENV DEBIAN_FRONTEND=noninteractive From 8cf50769349d3f432e602a44f2c48a94e56d769d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 16:45:54 +0100 Subject: [PATCH 03/68] Apply suggestion from @msimberg --- .cscs-ci/container/deps.Containerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index a07e4797..32920e3d 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -19,6 +19,7 @@ RUN spack repo remove --scope defaults:base builtin && \ COPY $SPACK_ENV_FILE /spack_environment/spack.yaml -RUN spack env create ci /spack_environment/spack.yaml && \ +RUN spack external find --all && \ + spack env create ci /spack_environment/spack.yaml && \ spack -e ci concretize -f && \ spack -e ci install --jobs $(nproc) --fail-fast --only=dependencies From 5423771993a2b6bec66453337d9ba61a9bd3e0a1 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 21:59:35 +0100 Subject: [PATCH 04/68] Fix CI container build args --- .cscs-ci/container/deps.Containerfile | 12 +++++------- .cscs-ci/default.yaml | 7 +++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index 32920e3d..bcba848f 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -1,22 +1,20 @@ -FROM ghcr.io/eth-cscs/alps-images:py26.01-alps3-base - -ARG SPACK_SHA=v1.1.1 -ARG SPACK_PACKAGES_SHA=bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 -ARG SPACK_ENV_FILE - -ENV DEBIAN_FRONTEND=noninteractive +ARG BASE_IMAGE +FROM BASE_IMAGE +ARG SPACK_SHA RUN mkdir -p /opt/spack && \ curl -Ls "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack ENV PATH="/opt/spack/bin:$PATH" +ARG SPACK_PACKAGES_SHA RUN mkdir -p /opt/spack-packages && \ curl -Ls "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages RUN spack repo remove --scope defaults:base builtin && \ spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin +ARG SPACK_ENV_FILE COPY $SPACK_ENV_FILE /spack_environment/spack.yaml RUN spack external find --all && \ diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 959ba3d4..cc1818b4 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -7,10 +7,9 @@ stages: - test variables: - # The base image is the py26.01 alps3 image from docs.cscs.ch - BASE_IMAGE: ghcr.io/eth-cscs/alps-images:py26.01-alps3-base - SPACK_SHA: develop - SPACK_PACKAGES_SHA: main + BASE_IMAGE: jfrog.svc.ccs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 + SPACK_SHA: v1.1.1 + SPACK_PACKAGES_SHA: bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 FF_TIMESTAMPS: true .build_deps_template: From 3f15f1f207e5a5c4c73a80f267255c9fd88e54ab Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 22:01:56 +0100 Subject: [PATCH 05/68] Specify oomph@main in spack environments --- .cscs-ci/spack/libfabric.yaml | 2 +- .cscs-ci/spack/mpi.yaml | 2 +- .cscs-ci/spack/nccl.yaml | 2 +- .cscs-ci/spack/ucx.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.cscs-ci/spack/libfabric.yaml b/.cscs-ci/spack/libfabric.yaml index f659f278..27fdfb08 100644 --- a/.cscs-ci/spack/libfabric.yaml +++ b/.cscs-ci/spack/libfabric.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph backend=libfabric +cuda +python + - oomph@main backend=libfabric +cuda +python view: false concretizer: unify: true diff --git a/.cscs-ci/spack/mpi.yaml b/.cscs-ci/spack/mpi.yaml index 696d894d..90e45ff8 100644 --- a/.cscs-ci/spack/mpi.yaml +++ b/.cscs-ci/spack/mpi.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph backend=mpi +cuda +python + - oomph@main backend=mpi +cuda +python view: false concretizer: unify: true diff --git a/.cscs-ci/spack/nccl.yaml b/.cscs-ci/spack/nccl.yaml index 2dc59834..4c08a383 100644 --- a/.cscs-ci/spack/nccl.yaml +++ b/.cscs-ci/spack/nccl.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph backend=nccl +cuda +python + - oomph@main backend=nccl +cuda +python view: false concretizer: unify: true diff --git a/.cscs-ci/spack/ucx.yaml b/.cscs-ci/spack/ucx.yaml index 76100e29..251a4ec9 100644 --- a/.cscs-ci/spack/ucx.yaml +++ b/.cscs-ci/spack/ucx.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph backend=ucx +cuda +python + - oomph@main backend=ucx +cuda +python view: false concretizer: unify: true From b832fe622e928f849e6a132b2ac191880eac4acb Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 22:02:26 +0100 Subject: [PATCH 06/68] Remove +python from spack specs --- .cscs-ci/spack/libfabric.yaml | 2 +- .cscs-ci/spack/mpi.yaml | 2 +- .cscs-ci/spack/nccl.yaml | 2 +- .cscs-ci/spack/ucx.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.cscs-ci/spack/libfabric.yaml b/.cscs-ci/spack/libfabric.yaml index 27fdfb08..fac7f88f 100644 --- a/.cscs-ci/spack/libfabric.yaml +++ b/.cscs-ci/spack/libfabric.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph@main backend=libfabric +cuda +python + - oomph@main backend=libfabric +cuda view: false concretizer: unify: true diff --git a/.cscs-ci/spack/mpi.yaml b/.cscs-ci/spack/mpi.yaml index 90e45ff8..d59aab13 100644 --- a/.cscs-ci/spack/mpi.yaml +++ b/.cscs-ci/spack/mpi.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph@main backend=mpi +cuda +python + - oomph@main backend=mpi +cuda view: false concretizer: unify: true diff --git a/.cscs-ci/spack/nccl.yaml b/.cscs-ci/spack/nccl.yaml index 4c08a383..94f0dd31 100644 --- a/.cscs-ci/spack/nccl.yaml +++ b/.cscs-ci/spack/nccl.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph@main backend=nccl +cuda +python + - oomph@main backend=nccl +cuda view: false concretizer: unify: true diff --git a/.cscs-ci/spack/ucx.yaml b/.cscs-ci/spack/ucx.yaml index 251a4ec9..51377dd8 100644 --- a/.cscs-ci/spack/ucx.yaml +++ b/.cscs-ci/spack/ucx.yaml @@ -1,6 +1,6 @@ spack: specs: - - oomph@main backend=ucx +cuda +python + - oomph@main backend=ucx +cuda view: false concretizer: unify: true From 1e851df257cb97ba664ec392f0c472bcb9c3a615 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 22:10:23 +0100 Subject: [PATCH 07/68] Remove stages --- .cscs-ci/default.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index cc1818b4..c3e80df3 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -1,11 +1,6 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' -stages: - - build_deps - - build - - test - variables: BASE_IMAGE: jfrog.svc.ccs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 SPACK_SHA: v1.1.1 @@ -13,7 +8,6 @@ variables: FF_TIMESTAMPS: true .build_deps_template: - stage: build_deps timeout: 1 hours before_script: - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true @@ -63,7 +57,6 @@ build_deps_libfabric: SPACK_ENV_FILE: .cscs-ci/spack/libfabric.yaml .build_template: - stage: build extends: .container-builder-cscs-gh200 timeout: 1 hours before_script: @@ -110,7 +103,6 @@ build_libfabric: BACKEND: libfabric .test_serial_template: - stage: test extends: .container-runner-clariden-gh200 variables: SLURM_JOB_NUM_NODES: 1 @@ -121,7 +113,6 @@ build_libfabric: - ctest --test-dir build -L "serial" --output-on-failure .test_parallel_template: - stage: test extends: .container-runner-clariden-gh200 variables: SLURM_JOB_NUM_NODES: 1 From 0398d59869d80c6a002ceb37497820bc384b50a9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Mar 2026 22:13:40 +0100 Subject: [PATCH 08/68] Refactor ci config --- .cscs-ci/default.yaml | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index c3e80df3..2a0bc830 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -19,42 +19,39 @@ variables: - echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env variables: DOCKERFILE: .cscs-ci/container/deps.Containerfile - DOCKER_BUILD_ARGS: '["SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' + DOCKER_BUILD_ARGS: '["BASE_IMAGE", "SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' + SPACK_ENV_FILE: .cscs./spack/$BACKEND.yaml artifacts: reports: dotenv: base-${BACKEND}.env build_deps_nccl: + variables: + BACKEND: nccl extends: - .container-builder-cscs-gh200 - .build_deps_template - variables: - BACKEND: nccl - SPACK_ENV_FILE: .cscs-ci/spack/nccl.yaml build_deps_mpi: + variables: + BACKEND: mpi extends: - .container-builder-cscs-gh200 - .build_deps_template - variables: - BACKEND: mpi - SPACK_ENV_FILE: .cscs-ci/spack/mpi.yaml build_deps_ucx: + variables: + BACKEND: ucx extends: - .container-builder-cscs-gh200 - .build_deps_template - variables: - BACKEND: ucx - SPACK_ENV_FILE: .cscs-ci/spack/ucx.yaml build_deps_libfabric: + variables: + BACKEND: libfabric extends: - .container-builder-cscs-gh200 - .build_deps_template - variables: - BACKEND: libfabric - SPACK_ENV_FILE: .cscs-ci/spack/libfabric.yaml .build_template: extends: .container-builder-cscs-gh200 @@ -71,36 +68,36 @@ build_deps_libfabric: dotenv: build-${BACKEND}.env build_nccl: + variables: + BACKEND: nccl extends: .build_template needs: - job: build_deps_nccl artifacts: true - variables: - BACKEND: nccl build_mpi: + variables: + BACKEND: mpi extends: .build_template needs: - job: build_deps_mpi artifacts: true - variables: - BACKEND: mpi build_ucx: + variables: + BACKEND: ucx extends: .build_template needs: - job: build_deps_ucx artifacts: true - variables: - BACKEND: ucx build_libfabric: + variables: + BACKEND: libfabric extends: .build_template needs: - job: build_deps_libfabric artifacts: true - variables: - BACKEND: libfabric .test_serial_template: extends: .container-runner-clariden-gh200 From b1297aca28222bdc614d04851e4d5d3f6bd129e8 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 09:13:10 +0100 Subject: [PATCH 09/68] Fix base image --- .cscs-ci/container/deps.Containerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index bcba848f..50570529 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -1,5 +1,5 @@ ARG BASE_IMAGE -FROM BASE_IMAGE +FROM $BASE_IMAGE ARG SPACK_SHA RUN mkdir -p /opt/spack && \ From 34847222c671735ee2ec057d9fad716162bb866b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 09:26:27 +0100 Subject: [PATCH 10/68] Fix typo --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 2a0bc830..2f3e3db2 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -2,7 +2,7 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' variables: - BASE_IMAGE: jfrog.svc.ccs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 + BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 SPACK_SHA: v1.1.1 SPACK_PACKAGES_SHA: bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 FF_TIMESTAMPS: true From 33624ef1261b5d0ef305f77e015d6ac6d2f68212 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 09:31:11 +0100 Subject: [PATCH 11/68] Fix env file path --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 2f3e3db2..14baa148 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -20,7 +20,7 @@ variables: variables: DOCKERFILE: .cscs-ci/container/deps.Containerfile DOCKER_BUILD_ARGS: '["BASE_IMAGE", "SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' - SPACK_ENV_FILE: .cscs./spack/$BACKEND.yaml + SPACK_ENV_FILE: .cscs-ci/spack/$BACKEND.yaml artifacts: reports: dotenv: base-${BACKEND}.env From a3e950dfbf5e5cc4340cb1af96ee3809778f4801 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 09:45:54 +0100 Subject: [PATCH 12/68] Update cmake config in CI --- .cscs-ci/container/build.Containerfile | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile index 784221cb..1f010a87 100644 --- a/.cscs-ci/container/build.Containerfile +++ b/.cscs-ci/container/build.Containerfile @@ -4,5 +4,16 @@ FROM $DEPS_IMAGE COPY . /oomph WORKDIR /oomph -RUN spack -e ci build-env oomph -- cmake -B build -DOOMPH_WITH_TESTING=ON -DMPIEXEC_EXECUTABLE="" -DMPIEXEC_NUMPROC_FLAG="" -DMPIEXEC_PREFLAGS="" -DMPIEXEC_POSTFLAGS="" && \ +ARG BACKEND +RUN spack -e ci build-env oomph -- \ + cmake -G Ninja -B build \ + -DOOMPH_WITH_TESTING=ON \ + # Converte BACKEND to uppercase + -DOOMPH_WITH_$(echo $BACKEND | tr '[:lower:]' '[:upper:]')=ON \ + -DOOMPH_USE_BUNDLED_LIBS=ON \ + -DOOMPH_USE_BUNDLED_HWMALLOC=OFF \ + -DMPIEXEC_EXECUTABLE="" \ + -DMPIEXEC_NUMPROC_FLAG="" \ + -DMPIEXEC_PREFLAGS="" \ + -DMPIEXEC_POSTFLAGS="" && \ spack -e ci build-env oomph -- cmake --build build -j$(nproc) From 9871e880437958381c6d0280aeee276f66f5a760 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 10:18:52 +0100 Subject: [PATCH 13/68] Use NUM_PROCS instead of nproc --- .cscs-ci/container/build.Containerfile | 3 ++- .cscs-ci/container/deps.Containerfile | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile index 1f010a87..c16ce28d 100644 --- a/.cscs-ci/container/build.Containerfile +++ b/.cscs-ci/container/build.Containerfile @@ -5,6 +5,7 @@ COPY . /oomph WORKDIR /oomph ARG BACKEND +ARG NUM_PROCS RUN spack -e ci build-env oomph -- \ cmake -G Ninja -B build \ -DOOMPH_WITH_TESTING=ON \ @@ -16,4 +17,4 @@ RUN spack -e ci build-env oomph -- \ -DMPIEXEC_NUMPROC_FLAG="" \ -DMPIEXEC_PREFLAGS="" \ -DMPIEXEC_POSTFLAGS="" && \ - spack -e ci build-env oomph -- cmake --build build -j$(nproc) + spack -e ci build-env oomph -- cmake --build build $NUM_PROCS diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index 50570529..5fc530bd 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -17,7 +17,8 @@ RUN spack repo remove --scope defaults:base builtin && \ ARG SPACK_ENV_FILE COPY $SPACK_ENV_FILE /spack_environment/spack.yaml +ARG NUM_PROCS RUN spack external find --all && \ spack env create ci /spack_environment/spack.yaml && \ spack -e ci concretize -f && \ - spack -e ci install --jobs $(nproc) --fail-fast --only=dependencies + spack -e ci install --jobs $NUM_PROCS --fail-fast --only=dependencies From 177592ddd10994449c066a517852f1a94295798c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 10:47:21 +0100 Subject: [PATCH 14/68] Fix num procs --- .cscs-ci/container/build.Containerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile index c16ce28d..66a8ae69 100644 --- a/.cscs-ci/container/build.Containerfile +++ b/.cscs-ci/container/build.Containerfile @@ -17,4 +17,4 @@ RUN spack -e ci build-env oomph -- \ -DMPIEXEC_NUMPROC_FLAG="" \ -DMPIEXEC_PREFLAGS="" \ -DMPIEXEC_POSTFLAGS="" && \ - spack -e ci build-env oomph -- cmake --build build $NUM_PROCS + spack -e ci build-env oomph -- cmake --build build -j$NUM_PROCS From 110d4eb869544c8d1e2efda08083a8f80443fe47 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 13:01:55 +0100 Subject: [PATCH 15/68] Update test job config --- .cscs-ci/default.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 14baa148..65467789 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -106,8 +106,9 @@ build_libfabric: SLURM_NTASKS: 1 SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal + SLURM_NETWORK=disable_rdzv_get script: - - ctest --test-dir build -L "serial" --output-on-failure + - ctest --test-dir /oomph/build -L "serial" --output-on-failure .test_parallel_template: extends: .container-runner-clariden-gh200 @@ -117,9 +118,10 @@ build_libfabric: SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal SLURM_MPI: pmix + SLURM_NETWORK=disable_rdzv_get MPICH_GPU_SUPPORT_ENABLED: 1 script: - - srun -n 4 ctest --test-dir build -L "parallel-ranks-4" --output-on-failure + - srun -n 4 ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure test_serial_nccl: extends: .test_serial_template From 95183544bfbe219d59d1e1eb0b7337c5e7acfd8b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 13:04:31 +0100 Subject: [PATCH 16/68] Fix syntax --- .cscs-ci/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 65467789..3b649519 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -106,7 +106,7 @@ build_libfabric: SLURM_NTASKS: 1 SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal - SLURM_NETWORK=disable_rdzv_get + SLURM_NETWORK: disable_rdzv_get script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure @@ -118,7 +118,7 @@ build_libfabric: SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal SLURM_MPI: pmix - SLURM_NETWORK=disable_rdzv_get + SLURM_NETWORK: disable_rdzv_get MPICH_GPU_SUPPORT_ENABLED: 1 script: - srun -n 4 ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure From 5419c5be656be431f5658ebd3072bf0990869cd2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 13:39:34 +0100 Subject: [PATCH 17/68] Fix parallel testing --- .cscs-ci/default.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 3b649519..c09ce896 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -119,9 +119,10 @@ build_libfabric: SLURM_PARTITION: normal SLURM_MPI: pmix SLURM_NETWORK: disable_rdzv_get + SLURM_LABELIO: 1 MPICH_GPU_SUPPORT_ENABLED: 1 script: - - srun -n 4 ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure test_serial_nccl: extends: .test_serial_template From ffd02a45ce35f0da1ba76d38001415fee5b070b2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 14:22:49 +0100 Subject: [PATCH 18/68] Explicitly ask for one gpu per task --- .cscs-ci/default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index c09ce896..55780273 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -114,6 +114,7 @@ build_libfabric: extends: .container-runner-clariden-gh200 variables: SLURM_JOB_NUM_NODES: 1 + SLURM_GPUS_PER_TASK: 1 SLURM_NTASKS: 4 SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal From a81f37f4b47d0f5e3e338c1c5c5e7417aad838e4 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 14:22:58 +0100 Subject: [PATCH 19/68] Verbose ctest output --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 55780273..bcab0e30 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -123,7 +123,7 @@ build_libfabric: SLURM_LABELIO: 1 MPICH_GPU_SUPPORT_ENABLED: 1 script: - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose test_serial_nccl: extends: .test_serial_template From 7b35569ffdc33c346d4ea0578dd8689990babb21 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 14:48:05 +0100 Subject: [PATCH 20/68] Explicitly set debug build for CI --- .cscs-ci/container/build.Containerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile index 66a8ae69..fe3e707f 100644 --- a/.cscs-ci/container/build.Containerfile +++ b/.cscs-ci/container/build.Containerfile @@ -8,8 +8,8 @@ ARG BACKEND ARG NUM_PROCS RUN spack -e ci build-env oomph -- \ cmake -G Ninja -B build \ + -DCMAKE_BUILD_TYPE=Debug \ -DOOMPH_WITH_TESTING=ON \ - # Converte BACKEND to uppercase -DOOMPH_WITH_$(echo $BACKEND | tr '[:lower:]' '[:upper:]')=ON \ -DOOMPH_USE_BUNDLED_LIBS=ON \ -DOOMPH_USE_BUNDLED_HWMALLOC=OFF \ From 60e0e25cf9f1c3b3df23fb097184bc5c3af3a67f Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 14:50:50 +0100 Subject: [PATCH 21/68] Don't set any mpiexec options if MPIEXEC_EXECUTABLE is empty --- test/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 31fea066..e645a636 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -62,10 +62,15 @@ function(reg_parallel_test t_ lib n) oomph_target_compile_options(${t}) target_link_libraries(${t} PRIVATE gtest_main_mpi) target_link_libraries(${t} PRIVATE oomph_${lib}) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) + # If not empty + if("${MPIEXEC_EXECUTABLE}" STREQUAL "") + add_test(NAME ${t} COMMAND $) + else() + add_test( + NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} + $ ${MPIEXEC_POSTFLAGS}) + endif() set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE LABELS "parallel-ranks-${n}") endfunction() From c3ea5689c8a4e1c3ea7ce06d0aea3a8500749e01 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 16:37:53 +0100 Subject: [PATCH 22/68] Don't buffer test output --- .cscs-ci/default.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index bcab0e30..3b93bf25 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -107,6 +107,7 @@ build_libfabric: SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal SLURM_NETWORK: disable_rdzv_get + SLURM_UNBUFFERED: 1 script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure @@ -121,6 +122,7 @@ build_libfabric: SLURM_MPI: pmix SLURM_NETWORK: disable_rdzv_get SLURM_LABELIO: 1 + SLURM_UNBUFFERED: 1 MPICH_GPU_SUPPORT_ENABLED: 1 script: - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose From 521011e320f203e8cfc5427f24d00360fefa9ca3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 16:38:18 +0100 Subject: [PATCH 23/68] Skip cancel test --- .cscs-ci/default.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 3b93bf25..7110ce50 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -125,7 +125,8 @@ build_libfabric: SLURM_UNBUFFERED: 1 MPICH_GPU_SUPPORT_ENABLED: 1 script: - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose + # TODO: test_cancel hanging? + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose -E test_cancel test_serial_nccl: extends: .test_serial_template From abb418899d225730b6394df8c9fe3a9327f26466 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 16:46:42 +0100 Subject: [PATCH 24/68] Fix slurm variables --- .cscs-ci/default.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 7110ce50..9f2b60d7 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -107,7 +107,7 @@ build_libfabric: SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal SLURM_NETWORK: disable_rdzv_get - SLURM_UNBUFFERED: 1 + SLURM_UNBUFFEREDIO: 1 script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure @@ -119,10 +119,10 @@ build_libfabric: SLURM_NTASKS: 4 SLURM_TIMELIMIT: '00:15:00' SLURM_PARTITION: normal - SLURM_MPI: pmix + SLURM_MPI_TYPE: pmix SLURM_NETWORK: disable_rdzv_get SLURM_LABELIO: 1 - SLURM_UNBUFFERED: 1 + SLURM_UNBUFFEREDIO: 1 MPICH_GPU_SUPPORT_ENABLED: 1 script: # TODO: test_cancel hanging? From 3689ebee5882b7e08f81c51bc508245ee61f72ad Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 16:49:39 +0100 Subject: [PATCH 25/68] Shorten timeouts --- .cscs-ci/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 9f2b60d7..2d831960 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -55,7 +55,7 @@ build_deps_libfabric: .build_template: extends: .container-builder-cscs-gh200 - timeout: 1 hours + timeout: 15 minutes before_script: - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-build-$BACKEND:$CI_COMMIT_SHA @@ -117,7 +117,7 @@ build_libfabric: SLURM_JOB_NUM_NODES: 1 SLURM_GPUS_PER_TASK: 1 SLURM_NTASKS: 4 - SLURM_TIMELIMIT: '00:15:00' + SLURM_TIMELIMIT: '5:00' SLURM_PARTITION: normal SLURM_MPI_TYPE: pmix SLURM_NETWORK: disable_rdzv_get From 841d97bc758a1fe532b03700f17ae118a66ae55b Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 16:56:37 +0100 Subject: [PATCH 26/68] Don't load cxi hooks in CI --- .cscs-ci/default.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 2d831960..0f48134c 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -108,6 +108,7 @@ build_libfabric: SLURM_PARTITION: normal SLURM_NETWORK: disable_rdzv_get SLURM_UNBUFFEREDIO: 1 + USE_MPI: NO script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure @@ -124,6 +125,7 @@ build_libfabric: SLURM_LABELIO: 1 SLURM_UNBUFFEREDIO: 1 MPICH_GPU_SUPPORT_ENABLED: 1 + USE_MPI: NO script: # TODO: test_cancel hanging? - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose -E test_cancel From d7995af0c2fe34da458e3192434050ed06a79464 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 26 Mar 2026 17:05:17 +0100 Subject: [PATCH 27/68] Update slurm and ctest options --- .cscs-ci/default.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 0f48134c..98ccacff 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -108,9 +108,11 @@ build_libfabric: SLURM_PARTITION: normal SLURM_NETWORK: disable_rdzv_get SLURM_UNBUFFEREDIO: 1 + PMIX_MCA_psec: native + PMIX_MCA_gds: "^shmem2" USE_MPI: NO script: - - ctest --test-dir /oomph/build -L "serial" --output-on-failure + - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 .test_parallel_template: extends: .container-runner-clariden-gh200 @@ -125,10 +127,12 @@ build_libfabric: SLURM_LABELIO: 1 SLURM_UNBUFFEREDIO: 1 MPICH_GPU_SUPPORT_ENABLED: 1 + PMIX_MCA_psec: native + PMIX_MCA_gds: "^shmem2" USE_MPI: NO script: # TODO: test_cancel hanging? - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose -E test_cancel + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose --timeout 60 test_serial_nccl: extends: .test_serial_template From c91ae1cfa92c42ef332d867a31b9e9796b79dd99 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 9 Apr 2026 15:03:20 +0200 Subject: [PATCH 28/68] List libfabric and ucx info in CI --- .cscs-ci/default.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 98ccacff..06165d12 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -132,6 +132,8 @@ build_libfabric: USE_MPI: NO script: # TODO: test_cancel hanging? + - fi_info + - ucx_info -d - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose --timeout 60 test_serial_nccl: From b74e96d5742d1628e325679a1f087ae4e9795dd3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 9 Apr 2026 15:14:47 +0200 Subject: [PATCH 29/68] Clean up test templates --- .cscs-ci/default.yaml | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 06165d12..c8dccdc7 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -99,41 +99,37 @@ build_libfabric: - job: build_deps_libfabric artifacts: true -.test_serial_template: +.test_template_base: extends: .container-runner-clariden-gh200 variables: SLURM_JOB_NUM_NODES: 1 - SLURM_NTASKS: 1 - SLURM_TIMELIMIT: '00:15:00' + SLURM_GPUS_PER_TASK: 1 + SLURM_TIMELIMIT: '5:00' SLURM_PARTITION: normal + SLURM_MPI_TYPE: pmix SLURM_NETWORK: disable_rdzv_get + SLURM_LABELIO: 1 SLURM_UNBUFFEREDIO: 1 PMIX_MCA_psec: native PMIX_MCA_gds: "^shmem2" USE_MPI: NO + +.test_serial_template: + extends: + - .container-runner-clariden-gh200 + - .test_template_base + variables: + SLURM_NTASKS: 1 script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 .test_parallel_template: - extends: .container-runner-clariden-gh200 + extends: + - .container-runner-clariden-gh200 + - .test_template_base variables: - SLURM_JOB_NUM_NODES: 1 - SLURM_GPUS_PER_TASK: 1 SLURM_NTASKS: 4 - SLURM_TIMELIMIT: '5:00' - SLURM_PARTITION: normal - SLURM_MPI_TYPE: pmix - SLURM_NETWORK: disable_rdzv_get - SLURM_LABELIO: 1 - SLURM_UNBUFFEREDIO: 1 - MPICH_GPU_SUPPORT_ENABLED: 1 - PMIX_MCA_psec: native - PMIX_MCA_gds: "^shmem2" - USE_MPI: NO script: - # TODO: test_cancel hanging? - - fi_info - - ucx_info -d - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose --timeout 60 test_serial_nccl: From 80ce06cec9e6ede1d88db5c60afbf66566696065 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 9 Apr 2026 15:31:59 +0200 Subject: [PATCH 30/68] Disable NCCL CI pipelines since it's not yet supported --- .cscs-ci/default.yaml | 55 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index c8dccdc7..321b1b7c 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -25,12 +25,13 @@ variables: reports: dotenv: base-${BACKEND}.env -build_deps_nccl: - variables: - BACKEND: nccl - extends: - - .container-builder-cscs-gh200 - - .build_deps_template +# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# build_deps_nccl: +# variables: +# BACKEND: nccl +# extends: +# - .container-builder-cscs-gh200 +# - .build_deps_template build_deps_mpi: variables: @@ -67,13 +68,14 @@ build_deps_libfabric: reports: dotenv: build-${BACKEND}.env -build_nccl: - variables: - BACKEND: nccl - extends: .build_template - needs: - - job: build_deps_nccl - artifacts: true +# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# build_nccl: +# variables: +# BACKEND: nccl +# extends: .build_template +# needs: +# - job: build_deps_nccl +# artifacts: true build_mpi: variables: @@ -132,19 +134,20 @@ build_libfabric: script: - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose --timeout 60 -test_serial_nccl: - extends: .test_serial_template - needs: - - job: build_nccl - artifacts: true - image: $BUILD_IMAGE - -test_parallel_nccl: - extends: .test_parallel_template - needs: - - job: build_nccl - artifacts: true - image: $BUILD_IMAGE +# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# test_serial_nccl: +# extends: .test_serial_template +# needs: +# - job: build_nccl +# artifacts: true +# image: $BUILD_IMAGE + +# test_parallel_nccl: +# extends: .test_parallel_template +# needs: +# - job: build_nccl +# artifacts: true +# image: $BUILD_IMAGE test_serial_mpi: extends: .test_serial_template From 3fed3780f1d5d746f229a969299897ba421e503d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 9 Apr 2026 16:49:07 +0200 Subject: [PATCH 31/68] Small cleanup and parallel non-distributed tests in CI --- .cscs-ci/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 321b1b7c..1212d17e 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -123,7 +123,7 @@ build_libfabric: variables: SLURM_NTASKS: 1 script: - - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 + - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 .test_parallel_template: extends: @@ -132,7 +132,7 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --verbose --timeout 60 + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: From 38112f2696fca1abe6673cb2004741e36a1f33ba Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 08:02:57 +0200 Subject: [PATCH 32/68] strace ctest call --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 1212d17e..5dc3eea4 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -132,7 +132,7 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + - strace ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: From f1e54aadb20ca737e5d070c72fc7ebc54717a2ef Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 09:05:52 +0200 Subject: [PATCH 33/68] Verbose CI tests --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 5dc3eea4..372ccb3c 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -132,7 +132,7 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - strace ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + - strace ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 --verbose # TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: From faf58b8df99b53bffeeb089cb9be534c56dbc504 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 16:26:39 +0200 Subject: [PATCH 34/68] Remove verbose parallel tests in CI --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 372ccb3c..1212d17e 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -132,7 +132,7 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - strace ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 --verbose + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: From e2e7d9c4357ed44a2c8509391a845a96befede82 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 16:45:04 +0200 Subject: [PATCH 35/68] Fix fortran parallel tests when MPIEXEC_EXECUTABLE is empty --- test/CMakeLists.txt | 1 - test/bindings/fortran/CMakeLists.txt | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e645a636..cb4e6f0e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -62,7 +62,6 @@ function(reg_parallel_test t_ lib n) oomph_target_compile_options(${t}) target_link_libraries(${t} PRIVATE gtest_main_mpi) target_link_libraries(${t} PRIVATE oomph_${lib}) - # If not empty if("${MPIEXEC_EXECUTABLE}" STREQUAL "") add_test(NAME ${t} COMMAND $) else() diff --git a/test/bindings/fortran/CMakeLists.txt b/test/bindings/fortran/CMakeLists.txt index 10e69e15..2a5980c5 100644 --- a/test/bindings/fortran/CMakeLists.txt +++ b/test/bindings/fortran/CMakeLists.txt @@ -25,10 +25,14 @@ function(reg_parallel_test_f t_ lib n nthr) $ $ $) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) + if("${MPIEXEC_EXECUTABLE}" STREQUAL "") + add_test(NAME ${t} COMMAND $) + else() + add_test( + NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} + $ ${MPIEXEC_POSTFLAGS}) + endif() set_tests_properties(${t} PROPERTIES ENVIRONMENT OMP_NUM_THREADS=${nthr} LABELS "parallel-ranks-${n}") From d83dbaaea959940a822c7cafcc9c61564174471d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 16:51:06 +0200 Subject: [PATCH 36/68] Add missing BACKEND build arg to build step in CI --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 1212d17e..f4dc498f 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -63,7 +63,7 @@ build_deps_libfabric: - echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env variables: DOCKERFILE: .cscs-ci/container/build.Containerfile - DOCKER_BUILD_ARGS: '["DEPS_IMAGE"]' + DOCKER_BUILD_ARGS: '["DEPS_IMAGE", "BACKEND"]' artifacts: reports: dotenv: build-${BACKEND}.env From a21ac784602e07b4736c95deb783120a24c11b3c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 20:01:16 +0200 Subject: [PATCH 37/68] Singular hour --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index f4dc498f..f5fb0a64 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -8,7 +8,7 @@ variables: FF_TIMESTAMPS: true .build_deps_template: - timeout: 1 hours + timeout: 1 hour before_script: - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16` From d2ae1f0db7419ddfcf010c9295f85e2cbd9d77bb Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 20:01:57 +0200 Subject: [PATCH 38/68] Double word --- .cscs-ci/default.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index f5fb0a64..be8be743 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -25,7 +25,7 @@ variables: reports: dotenv: base-${BACKEND}.env -# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # build_deps_nccl: # variables: # BACKEND: nccl @@ -68,7 +68,7 @@ build_deps_libfabric: reports: dotenv: build-${BACKEND}.env -# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # build_nccl: # variables: # BACKEND: nccl @@ -134,7 +134,7 @@ build_libfabric: script: - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 -# TODO: NCCL will be be enabled in https://github.com/ghex-org/oomph/pull/55 +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: # extends: .test_serial_template # needs: From bdd9374414373cc7b4a814c184315af113e3058a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 20:02:44 +0200 Subject: [PATCH 39/68] Remove unnecessary extends --- .cscs-ci/default.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index be8be743..6c7f56c7 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -117,18 +117,14 @@ build_libfabric: USE_MPI: NO .test_serial_template: - extends: - - .container-runner-clariden-gh200 - - .test_template_base + extends: .test_template_base variables: SLURM_NTASKS: 1 script: - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 .test_parallel_template: - extends: - - .container-runner-clariden-gh200 - - .test_template_base + extends: .test_template_base variables: SLURM_NTASKS: 4 script: From d89bf20c0e5bfcbd71222046b0b03f5b212687f4 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 10 Apr 2026 20:38:07 +0200 Subject: [PATCH 40/68] More curl flags --- .cscs-ci/container/deps.Containerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile index 5fc530bd..f5867ac5 100644 --- a/.cscs-ci/container/deps.Containerfile +++ b/.cscs-ci/container/deps.Containerfile @@ -3,13 +3,13 @@ FROM $BASE_IMAGE ARG SPACK_SHA RUN mkdir -p /opt/spack && \ - curl -Ls "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack + curl -fLsS "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack ENV PATH="/opt/spack/bin:$PATH" ARG SPACK_PACKAGES_SHA RUN mkdir -p /opt/spack-packages && \ - curl -Ls "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages + curl -fLsS "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages RUN spack repo remove --scope defaults:base builtin && \ spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin From 45d69924fd643a4db017e9bb8ec2f44b748fb6f7 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 09:33:12 +0200 Subject: [PATCH 41/68] Use prerelease base image in CI --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 6c7f56c7..e14c5e47 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -2,7 +2,7 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' variables: - BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 + BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps4-dev SPACK_SHA: v1.1.1 SPACK_PACKAGES_SHA: bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 FF_TIMESTAMPS: true From 841b9bfc27a9daaf7b8f0a5e3bd809440f9f06c6 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 16:06:28 +0200 Subject: [PATCH 42/68] Use separate Testing directory for ctest per process --- .cscs-ci/default.yaml | 2 +- .cscs-ci/scripts/ctest-bind-testing-dir.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100755 .cscs-ci/scripts/ctest-bind-testing-dir.sh diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index e14c5e47..9a9eeaa4 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -121,7 +121,7 @@ build_libfabric: variables: SLURM_NTASKS: 1 script: - - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 + - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 .test_parallel_template: extends: .test_template_base diff --git a/.cscs-ci/scripts/ctest-bind-testing-dir.sh b/.cscs-ci/scripts/ctest-bind-testing-dir.sh new file mode 100755 index 00000000..04462b75 --- /dev/null +++ b/.cscs-ci/scripts/ctest-bind-testing-dir.sh @@ -0,0 +1,9 @@ +#/usr/bin/env bash +# +# Helper script to mount a separate directory for the Testing/Temporary +# directory for each process when running ctest within slurm. + +set -x +unshare --mount --map-root-user \ + bash -c \ + "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && exec \"$@\"" From c80510f79ebe3e8dac58a1596e0f4ae3a2d23e76 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 16:19:05 +0200 Subject: [PATCH 43/68] Try to fix ctest wrapper --- .cscs-ci/default.yaml | 4 ++-- .cscs-ci/scripts/ctest-bind-testing-dir.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 9a9eeaa4..fdd27dcd 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -121,14 +121,14 @@ build_libfabric: variables: SLURM_NTASKS: 1 script: - - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 + - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 .test_parallel_template: extends: .test_template_base variables: SLURM_NTASKS: 4 script: - - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: diff --git a/.cscs-ci/scripts/ctest-bind-testing-dir.sh b/.cscs-ci/scripts/ctest-bind-testing-dir.sh index 04462b75..12ce5f7d 100755 --- a/.cscs-ci/scripts/ctest-bind-testing-dir.sh +++ b/.cscs-ci/scripts/ctest-bind-testing-dir.sh @@ -6,4 +6,4 @@ set -x unshare --mount --map-root-user \ bash -c \ - "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && exec \"$@\"" + "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && $@" From 5fe5a13b83b5d9ff1c1d2bc13d1f0c49f0c87895 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 16:42:48 +0200 Subject: [PATCH 44/68] cd into build directory --- .cscs-ci/default.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index fdd27dcd..f2a095b8 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -128,7 +128,8 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + - cd /oomph/build + - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: From e55f1ad2244718cd44b3d9db98bc2020b3ad97c7 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 17:07:18 +0200 Subject: [PATCH 45/68] Fix testing path --- .cscs-ci/scripts/ctest-bind-testing-dir.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.cscs-ci/scripts/ctest-bind-testing-dir.sh b/.cscs-ci/scripts/ctest-bind-testing-dir.sh index 12ce5f7d..10b9712f 100755 --- a/.cscs-ci/scripts/ctest-bind-testing-dir.sh +++ b/.cscs-ci/scripts/ctest-bind-testing-dir.sh @@ -4,6 +4,7 @@ # directory for each process when running ctest within slurm. set -x +mkdir -p "/tmp/Testing/Temporary-${SLURM_PROCID}" unshare --mount --map-root-user \ bash -c \ "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && $@" From 2ce53ed4a58c70d87183857619dfe7386737bb55 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 17:20:43 +0200 Subject: [PATCH 46/68] Try without --map-root-user --- .cscs-ci/scripts/ctest-bind-testing-dir.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/scripts/ctest-bind-testing-dir.sh b/.cscs-ci/scripts/ctest-bind-testing-dir.sh index 10b9712f..bd310aed 100755 --- a/.cscs-ci/scripts/ctest-bind-testing-dir.sh +++ b/.cscs-ci/scripts/ctest-bind-testing-dir.sh @@ -5,6 +5,6 @@ set -x mkdir -p "/tmp/Testing/Temporary-${SLURM_PROCID}" -unshare --mount --map-root-user \ +unshare --mount \ bash -c \ "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && $@" From 9c711025ed12d9614c8e0a16d1fef086b6dbf820 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 19:14:58 +0200 Subject: [PATCH 47/68] Try something else for ctest deadlocks --- .cscs-ci/default.yaml | 6 ++++-- .cscs-ci/scripts/ctest-bind-testing-dir.sh | 10 ---------- 2 files changed, 4 insertions(+), 12 deletions(-) delete mode 100755 .cscs-ci/scripts/ctest-bind-testing-dir.sh diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index f2a095b8..c0a53a11 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -128,8 +128,10 @@ build_libfabric: variables: SLURM_NTASKS: 4 script: - - cd /oomph/build - - /oomph/.cscs-ci/scripts/ctest-bind-testing-dir.sh ctest -L "parallel-ranks-4" --output-on-failure --timeout 60 + # All ranks write to ctest files in Testing, but this can deadlock when + # writing inside the container. + - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; done + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 # test_serial_nccl: diff --git a/.cscs-ci/scripts/ctest-bind-testing-dir.sh b/.cscs-ci/scripts/ctest-bind-testing-dir.sh deleted file mode 100755 index bd310aed..00000000 --- a/.cscs-ci/scripts/ctest-bind-testing-dir.sh +++ /dev/null @@ -1,10 +0,0 @@ -#/usr/bin/env bash -# -# Helper script to mount a separate directory for the Testing/Temporary -# directory for each process when running ctest within slurm. - -set -x -mkdir -p "/tmp/Testing/Temporary-${SLURM_PROCID}" -unshare --mount \ - bash -c \ - "mount --bind /tmp/Testing/Temporary-${SLURM_PROCID} $PWD/Testing/Temporary && $@" From 5f3f6bb1287a61e36f646e4ac7efe761522e5af8 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 13 Apr 2026 19:50:36 +0200 Subject: [PATCH 48/68] Fix syntax error --- .cscs-ci/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index c0a53a11..05fd359b 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -130,7 +130,7 @@ build_libfabric: script: # All ranks write to ctest files in Testing, but this can deadlock when # writing inside the container. - - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; done + - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; fi - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 From e33fd3d597294bf1176d975f1a0bd95a2df712b2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 14 Apr 2026 11:08:46 +0200 Subject: [PATCH 49/68] Add sleep just to be safe when symlinking testing directory --- .cscs-ci/default.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 05fd359b..7bdc0567 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -131,6 +131,7 @@ build_libfabric: # All ranks write to ctest files in Testing, but this can deadlock when # writing inside the container. - if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; fi + - sleep 1 - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 # TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 From a00c372a8e1d74b2ad197788ab47bfa506b86043 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 14 Apr 2026 13:44:19 +0200 Subject: [PATCH 50/68] Use public path for base images --- .cscs-ci/default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml index 7bdc0567..c88a4522 100644 --- a/.cscs-ci/default.yaml +++ b/.cscs-ci/default.yaml @@ -14,7 +14,7 @@ variables: - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16` - export ENV_FILE_SHA=`sha256sum ${SPACK_ENV_FILE} | head -c 16` - export CONFIG_TAG=`echo $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | sha256sum - | head -c 16` - - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-spack-deps-$BACKEND:$CONFIG_TAG + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-spack-deps-$BACKEND:$CONFIG_TAG - echo -e "CONFIG_TAG=$CONFIG_TAG" >> base-${BACKEND}.env - echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env variables: @@ -59,7 +59,7 @@ build_deps_libfabric: timeout: 15 minutes before_script: - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true - - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-build-$BACKEND:$CI_COMMIT_SHA + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-build-$BACKEND:$CI_COMMIT_SHA - echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env variables: DOCKERFILE: .cscs-ci/container/build.Containerfile From a4e0936389fdc7eba220a1ebef056119be78d847 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 11:45:13 +0000 Subject: [PATCH 51/68] Clang-format files affected by libfabric changes --- include/oomph/detail/communicator_helper.hpp | 200 +- src/libfabric/communicator.hpp | 509 ++--- src/libfabric/context.cpp | 133 +- src/libfabric/context.hpp | 220 +- src/libfabric/controller.hpp | 682 +++--- src/libfabric/controller_base.hpp | 2136 +++++++++--------- src/libfabric/fabric_error.hpp | 54 +- src/libfabric/libfabric_defines_template.hpp | 18 +- src/libfabric/locality.cpp | 37 +- src/libfabric/locality.hpp | 411 ++-- src/libfabric/memory_region.hpp | 581 ++--- src/libfabric/operation_context.cpp | 183 +- src/libfabric/operation_context.hpp | 67 +- src/libfabric/operation_context_base.hpp | 137 +- src/libfabric/print.hpp | 1167 +++++----- src/libfabric/request_state.hpp | 166 +- src/libfabric/simple_counter.hpp | 126 +- test/test_send_recv.cpp | 206 +- 18 files changed, 3498 insertions(+), 3535 deletions(-) diff --git a/include/oomph/detail/communicator_helper.hpp b/include/oomph/detail/communicator_helper.hpp index 6e0e97d5..44f6d828 100644 --- a/include/oomph/detail/communicator_helper.hpp +++ b/include/oomph/detail/communicator_helper.hpp @@ -33,7 +33,7 @@ #define OOMPH_CHECK_CALLBACK_MSG_REF \ static_assert(std::is_same&>::value || \ - std::is_same const&>::value, \ + std::is_same const&>::value, \ "first callback argument type is not an l-value reference to a message_buffer"); #define OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ @@ -41,129 +41,107 @@ "first callback argument type is not a const l-value reference to a message_buffer"); #define OOMPH_CHECK_CALLBACK(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_MULTI(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_MULTI_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG} #define OOMPH_CHECK_CALLBACK_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_MULTI_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ + OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_MULTI_REF_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG_REF} #define OOMPH_CHECK_CALLBACK_CONST_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, rank_type, tag_type) OOMPH_CHECK_CALLBACK_MSG_CONST_REF} #define OOMPH_CHECK_CALLBACK_MULTI_CONST_REF(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, tag_type) \ + OOMPH_CHECK_CALLBACK_MSG_CONST_REF} #define OOMPH_CHECK_CALLBACK_MULTI_CONST_REF_TAGS(CALLBACK) \ - { \ - OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ - OOMPH_CHECK_CALLBACK_MSG_CONST_REF \ - } - -namespace oomph -{ -class communicator_impl; - -namespace detail -{ -struct communicator_state -{ - using impl_type = communicator_impl; - impl_type* m_impl; - std::atomic* m_shared_scheduled_recvs; - util::pool_factory m_mrs_factory; - std::size_t scheduled_sends = 0; - std::size_t scheduled_recvs = 0; - - communicator_state(impl_type* impl_, std::atomic* shared_scheduled_recvs); - ~communicator_state(); - communicator_state(communicator_state const&) = delete; - communicator_state(communicator_state&&) = delete; - communicator_state& operator=(communicator_state const&) = delete; - communicator_state& operator=(communicator_state&&) = delete; - - auto make_multi_request_state(std::size_t ns) { return m_mrs_factory.make(m_impl, ns); } - - template - auto make_multi_request_state(std::vector&& neighs, - oomph::message_buffer const& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer const& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, oomph::message_buffer& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), &msg); - } - - template - auto make_multi_request_state(std::vector&& neighs, oomph::message_buffer&& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::vector{}, - msg.size(), nullptr, std::move(msg.m)); - } - - template - auto make_multi_request_state(std::vector&& neighs, std::vector&& tags, - oomph::message_buffer&& msg) - { - return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), - msg.size(), nullptr, std::move(msg.m)); - } -}; - -} // namespace detail -} // namespace oomph + {OOMPH_CHECK_CALLBACK_F(CALLBACK, std::vector, std::vector) \ + OOMPH_CHECK_CALLBACK_MSG_CONST_REF} + +namespace oomph { + class communicator_impl; + + namespace detail { + struct communicator_state + { + using impl_type = communicator_impl; + impl_type* m_impl; + std::atomic* m_shared_scheduled_recvs; + util::pool_factory m_mrs_factory; + std::size_t scheduled_sends = 0; + std::size_t scheduled_recvs = 0; + + communicator_state(impl_type* impl_, std::atomic* shared_scheduled_recvs); + ~communicator_state(); + communicator_state(communicator_state const&) = delete; + communicator_state(communicator_state&&) = delete; + communicator_state& operator=(communicator_state const&) = delete; + communicator_state& operator=(communicator_state&&) = delete; + + auto make_multi_request_state(std::size_t ns) { return m_mrs_factory.make(m_impl, ns); } + + template + auto make_multi_request_state( + std::vector&& neighs, oomph::message_buffer const& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), &msg); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer const& msg) + { + return m_mrs_factory.make( + m_impl, neighs.size(), std::move(neighs), std::move(tags), msg.size(), &msg); + } + + template + auto + make_multi_request_state(std::vector&& neighs, oomph::message_buffer& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), &msg); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer& msg) + { + return m_mrs_factory.make( + m_impl, neighs.size(), std::move(neighs), std::move(tags), msg.size(), &msg); + } + + template + auto make_multi_request_state( + std::vector&& neighs, oomph::message_buffer&& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), + std::vector{}, msg.size(), nullptr, std::move(msg.m)); + } + + template + auto make_multi_request_state(std::vector&& neighs, + std::vector&& tags, oomph::message_buffer&& msg) + { + return m_mrs_factory.make(m_impl, neighs.size(), std::move(neighs), std::move(tags), + msg.size(), nullptr, std::move(msg.m)); + } + }; + + } // namespace detail +} // namespace oomph diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index ff8fc945..a38419dc 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -14,108 +14,109 @@ #include -#include #include +#include // paths relative to backend #include <../communicator_base.hpp> #include <../device_guard.hpp> +#include +#include #include #include -#include -#include - -namespace oomph -{ -using operation_context = libfabric::operation_context; +namespace oomph { -using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; + using operation_context = libfabric::operation_context; -template -inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); + using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; -static NS_DEBUG::enable_print com_err("COMMUNI"); + template + inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); -class communicator_impl : public communicator_base -{ - using tag_type = std::uint64_t; - // - using segment_type = libfabric::memory_segment; - using region_type = segment_type::handle_type; + static NS_DEBUG::enable_print com_err("COMMUNI"); - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - public: - context_impl* m_context; - libfabric::endpoint_wrapper m_tx_endpoint; - libfabric::endpoint_wrapper m_rx_endpoint; - // - callback_queue m_send_cb_queue; - callback_queue m_recv_cb_queue; - callback_queue m_recv_cb_cancel; - - // -------------------------------------------------------------------- - communicator_impl(context_impl* ctxt) - : communicator_base(ctxt) - , m_context(ctxt) - , m_send_cb_queue(128) - , m_recv_cb_queue(128) - , m_recv_cb_cancel(8) + class communicator_impl : public communicator_base { - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); - m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); - m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); - } + using tag_type = std::uint64_t; + // + using segment_type = libfabric::memory_segment; + using region_type = segment_type::handle_type; + + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; + + public: + context_impl* m_context; + libfabric::endpoint_wrapper m_tx_endpoint; + libfabric::endpoint_wrapper m_rx_endpoint; + // + callback_queue m_send_cb_queue; + callback_queue m_recv_cb_queue; + callback_queue m_recv_cb_cancel; + + // -------------------------------------------------------------------- + communicator_impl(context_impl* ctxt) + : communicator_base(ctxt) + , m_context(ctxt) + , m_send_cb_queue(128) + , m_recv_cb_queue(128) + , m_recv_cb_cancel(8) + { + LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); + m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); + m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); + } - // -------------------------------------------------------------------- - ~communicator_impl() { clear_callback_queues(); } + // -------------------------------------------------------------------- + ~communicator_impl() { clear_callback_queues(); } - // -------------------------------------------------------------------- - auto& get_heap() noexcept { return m_context->get_heap(); } + // -------------------------------------------------------------------- + auto& get_heap() noexcept { return m_context->get_heap(); } - // -------------------------------------------------------------------- - /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. - /// original tag can be 32bits, then we add 32bits of rank info. - inline std::uint64_t make_tag64(std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) - { - return (((ctxt & 0x0000000000FFFFFF) << 24) | ((std::uint64_t(tag) & 0x0000000000FFFFFF))); - } + // -------------------------------------------------------------------- + /// generate a tag with 0xRRRRRRRRtttttttt rank, tag. + /// original tag can be 32bits, then we add 32bits of rank info. + inline std::uint64_t make_tag64( + std::uint32_t tag, /*std::uint32_t rank, */ std::uintptr_t ctxt) + { + return (((ctxt & 0x0000'0000'00FF'FFFF) << 24) | + ((std::uint64_t(tag) & 0x0000'0000'00FF'FFFF))); + } - // -------------------------------------------------------------------- - template - inline void execute_fi_function(Func F, const char* msg, Args&&... args) - { - bool ok = false; - while (!ok) + // -------------------------------------------------------------------- + template + inline void execute_fi_function(Func F, char const* msg, Args&&... args) { - ssize_t ret = F(std::forward(args)...); - if (ret == 0) { return; } - else if (ret == -FI_EAGAIN) - { - // com_deb<9>.error("Reposting", msg); - // no point stressing the system - m_context->get_controller()->poll_for_work_completions(this); - } - else if (ret == -FI_ENOENT) + bool ok = false; + while (!ok) { - // if a node has failed, we can recover - // @TODO : put something better here - com_err.error("No destination endpoint, terminating."); - std::terminate(); + ssize_t ret = F(std::forward(args)...); + if (ret == 0) { return; } + else if (ret == -FI_EAGAIN) + { + // com_deb<9>.error("Reposting", msg); + // no point stressing the system + m_context->get_controller()->poll_for_work_completions(this); + } + else if (ret == -FI_ENOENT) + { + // if a node has failed, we can recover + // @TODO : put something better here + com_err.error("No destination endpoint, terminating."); + std::terminate(); + } + else if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), msg); } } - else if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), msg); } } - } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it - void send_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, - uint64_t tag_, operation_context* ctxt) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // clang-format off + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it + void send_tagged_region(region_type const& send_region, std::size_t size, + fi_addr_t dst_addr_, uint64_t tag_, operation_context* ctxt) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("send_tagged_region"), "->", NS_DEBUG::dec<2>(dst_addr_), @@ -123,22 +124,24 @@ class communicator_impl : public communicator_base "tag", tag_disp(tag_), "context", NS_DEBUG::ptr(ctxt), "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); - // clang-format on - execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), - size, send_region.get_local_key(), dst_addr_, tag_, ctxt); - } + // clang-format on + execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), + send_region.get_address(), size, send_region.get_local_key(), dst_addr_, tag_, + ctxt); + } - // -------------------------------------------------------------------- - // this takes a pinned memory region and sends it using inject instead of send - void inject_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, - uint64_t tag_) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // clang-format on - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), send_region, - "tag", tag_disp(tag_), "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); - // clang-format off + // -------------------------------------------------------------------- + // this takes a pinned memory region and sends it using inject instead of send + void inject_tagged_region( + region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + // clang-format on + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), + send_region, "tag", tag_disp(tag_), "tx endpoint", + NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + // clang-format off execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), send_region.get_address(), size, dst_addr_, tag_); } @@ -159,62 +162,65 @@ class communicator_impl : public communicator_base "tag", tag_disp(tag_), "context", NS_DEBUG::ptr(ctxt), "rx endpoint", NS_DEBUG::ptr(m_rx_endpoint.get_ep()))); - // clang-format on - constexpr uint64_t ignore = 0; - execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), recv_region.get_address(), - size, recv_region.get_local_key(), src_addr_, tag_, ignore, ctxt); - // if (l.owns_lock()) l.unlock(); - } + // clang-format on + constexpr uint64_t ignore = 0; + execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), + recv_region.get_address(), size, recv_region.get_local_key(), src_addr_, tag_, + ignore, ctxt); + // if (l.owns_lock()) l.unlock(); + } - // -------------------------------------------------------------------- - send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, rank_type dst, - oomph::tag_type tag, util::unique_function&& cb, - std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); + // -------------------------------------------------------------------- + send_request send(context_impl::heap_type::pointer const& ptr, std::size_t size, + rank_type dst, oomph::tag_type tag, + util::unique_function&& cb, std::size_t* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = + make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } -#endif - m_context->get_controller()->sends_posted_++; - - // use optimized inject if msg is very small - if (size <= m_context->get_controller()->get_tx_inject_size()) - { - inject_tagged_region(reg, size, fi_addr_t(dst), stag); - if (!has_reached_recursion_depth()) + if (size != reg.get_size()) { - auto inc = recursion(); - cb(dst, tag); - return {}; + LF_DEB(com_err, + error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); } - else +#endif + m_context->get_controller()->sends_posted_++; + + // use optimized inject if msg is very small + if (size <= m_context->get_controller()->get_tx_inject_size()) { - // construct request which is also an operation context - auto s = - m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); - while (!m_send_cb_queue.push(s.get())) {} - return {std::move(s)}; + inject_tagged_region(reg, size, fi_addr_t(dst), stag); + if (!has_reached_recursion_depth()) + { + auto inc = recursion(); + cb(dst, tag); + return {}; + } + else + { + // construct request which is also an operation context + auto s = m_req_state_factory.make( + m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); + while (!m_send_cb_queue.push(s.get())) {} + return {std::move(s)}; + } } - } - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, dst, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("Send"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -234,39 +240,40 @@ class communicator_impl : public communicator_base NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); } #endif - // clang-format on + // clang-format on - send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + send_tagged_region(reg, size, fi_addr_t(dst), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, - oomph::tag_type tag, util::unique_function&& cb, - std::size_t* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + recv_request recv(context_impl::heap_type::pointer& ptr, std::size_t size, rank_type src, + oomph::tag_type tag, util::unique_function&& cb, + std::size_t* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LF_DEB(com_err, + error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + } #endif - m_context->get_controller()->recvs_posted_++; + m_context->get_controller()->recvs_posted_++; - // construct request which is also an operation context - auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = m_req_state_factory.make(m_context, this, scheduled, src, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("recv"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -286,41 +293,42 @@ class communicator_impl : public communicator_base NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); } #endif - // clang-format on + // clang-format on - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - return {std::move(s)}; - } + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + return {std::move(s)}; + } - shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, - rank_type src, oomph::tag_type tag, - util::unique_function&& cb, - std::atomic* scheduled) - { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); + shared_recv_request shared_recv(context_impl::heap_type::pointer& ptr, std::size_t size, + rank_type src, oomph::tag_type tag, + util::unique_function&& cb, + std::atomic* scheduled) + { + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE - auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); + auto const& reg = ptr.on_device() ? ptr.device_handle() : ptr.handle(); #else - auto const& reg = ptr.handle(); + auto const& reg = ptr.handle(); #endif #ifdef EXTRA_SIZE_CHECKS - if (size != reg.get_size()) - { - LF_DEB(com_err, error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); - } + if (size != reg.get_size()) + { + LF_DEB(com_err, + error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), + "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + } #endif - m_context->get_controller()->recvs_posted_++; + m_context->get_controller()->recvs_posted_++; - // construct request which is also an operation context - auto s = std::make_shared(m_context, this, scheduled, src, - tag, std::move(cb)); - s->create_self_ref(); + // construct request which is also an operation context + auto s = std::make_shared( + m_context, this, scheduled, src, tag, std::move(cb)); + s->create_self_ref(); - // clang-format off + // clang-format off LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("shared_recv"), "thisrank", NS_DEBUG::dec<>(rank()), @@ -333,102 +341,97 @@ class communicator_impl : public communicator_base "reg size", NS_DEBUG::hex<6>(reg.get_size()), "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), "req", NS_DEBUG::ptr(s.get()))); - // clang-format on + // clang-format on - recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - return {std::move(s)}; - } + recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + return {std::move(s)}; + } - void progress() - { - m_context->get_controller()->poll_for_work_completions(this); - clear_callback_queues(); - } + void progress() + { + m_context->get_controller()->poll_for_work_completions(this); + clear_callback_queues(); + } - void clear_callback_queues() - { - // work through ready callbacks, which were pushed to the queue - // (by other threads) - m_send_cb_queue.consume_all( - [](oomph::detail::request_state* req) - { + void clear_callback_queues() + { + // work through ready callbacks, which were pushed to the queue + // (by other threads) + m_send_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::ptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_recv_cb_queue.consume_all( - [](oomph::detail::request_state* req) - { + m_recv_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::ptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); - m_context->m_recv_cb_queue.consume_all( - [](detail::shared_request_state* req) - { + m_context->m_recv_cb_queue.consume_all([](detail::shared_request_state* req) { auto ptr = req->release_self_ref(); req->invoke_cb(); }); - } + } - // Cancel is a problem with libfabric because fi_cancel is asynchronous. - // The item to be cancelled will either complete with CANCELLED status - // or will complete as usual (ie before the cancel could take effect) - // - // We can only be certain if we poll until the completion happens - // or attach a callback to the cancel notification which is not supported - // by oomph. - bool cancel_recv(detail::request_state* s) - { - // get the original message operation context - operation_context* op_ctx = &(s->m_operation_context); + // Cancel is a problem with libfabric because fi_cancel is asynchronous. + // The item to be cancelled will either complete with CANCELLED status + // or will complete as usual (ie before the cancel could take effect) + // + // We can only be certain if we poll until the completion happens + // or attach a callback to the cancel notification which is not supported + // by oomph. + bool cancel_recv(detail::request_state* s) + { + // get the original message operation context + operation_context* op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); + // submit the cancellation request + bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) - { - m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + bool found = false; + while (!found) { - if (temp == s) + m_context->get_controller()->poll_recv_queue(m_rx_endpoint.get_rx_cq(), this); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - // our recv was cancelled correctly - found = true; - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); + if (temp == s) + { + // our recv was cancelled correctly + found = true; + LF_DEB(com_deb<9>, + debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", + NS_DEBUG::ptr(op_ctx))); + auto ptr = s->release_self_ref(); + s->set_canceled(); + } + else + { + // a different cancel operation + temp_stack.push(temp); + } } - else + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) { - // a different cancel operation - temp_stack.push(temp); + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); } } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) - { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); - } + return found; } - return found; - } -}; + }; -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 5621a83b..cb7757a2 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -10,88 +10,83 @@ #include // #include - -#include - // paths relative to backend -#include -#include #include #include +#include +#include -namespace oomph -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print src_deb("__SRC__"); +namespace oomph { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print src_deb("__SRC__"); -using controller_type = libfabric::controller; + using controller_type = libfabric::controller; -context_impl::context_impl(MPI_Comm comm, bool thread_safe, - hwmalloc::heap_config const& heap_config) -: context_base(comm, thread_safe) -, m_heap{this, heap_config} -, m_recv_cb_queue(128) -, m_recv_cb_cancel(8) -{ - int rank, size; - OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); - OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); + context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + std::size_t message_pool_reserve) + : context_base(comm, thread_safe) + , m_heap{this, message_pool_never_free, message_pool_reserve} + , m_recv_cb_queue(128) + , m_recv_cb_cancel(8) + { + int rank, size; + OOMPH_CHECK_MPI_RESULT(MPI_Comm_rank(comm, &rank)); + OOMPH_CHECK_MPI_RESULT(MPI_Comm_size(comm, &size)); - m_ctxt_tag = reinterpret_cast(this); - OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); - LF_DEB(src_deb, debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", - debug::ptr(m_ctxt_tag))); + m_ctxt_tag = reinterpret_cast(this); + OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); + LF_DEB(src_deb, + debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", + debug::ptr(m_ctxt_tag))); - // TODO fix the thread safety - // problem: controller is a singleton and has problems when 2 contexts are created in the - // following order: single threaded first, then multi-threaded after - //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; - //int threads = std::thread::hardware_concurrency(); - int threads = boost::thread::physical_concurrency(); - m_controller = init_libfabric_controller(this, comm, rank, size, threads); - m_domain = m_controller->get_domain(); -} + // TODO fix the thread safety + // problem: controller is a singleton and has problems when 2 contexts are created in the + // following order: single threaded first, then multi-threaded after + //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; + //int threads = std::thread::hardware_concurrency(); + int threads = boost::thread::physical_concurrency(); + m_controller = init_libfabric_controller(this, comm, rank, size, threads); + m_domain = m_controller->get_domain(); + } -communicator_impl* -context_impl::get_communicator() -{ - auto comm = new communicator_impl{this}; - m_comms_set.insert(comm); - return comm; -} + communicator_impl* context_impl::get_communicator() + { + auto comm = new communicator_impl{this}; + m_comms_set.insert(comm); + return comm; + } -const char* -context_impl::get_transport_option(const std::string& opt) -{ - if (opt == "name") { return "libfabric"; } - else if (opt == "progress") { return libfabric_progress_string(); } - else if (opt == "endpoint") { return libfabric_endpoint_string(); } - else if (opt == "rendezvous_threshold") + char const* context_impl::get_transport_option(std::string const& opt) { - static char buffer[32]; - std::string temp = std::to_string(m_controller->rendezvous_threshold()); - strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); - return buffer; + if (opt == "name") { return "libfabric"; } + else if (opt == "progress") { return libfabric_progress_string(); } + else if (opt == "endpoint") { return libfabric_endpoint_string(); } + else if (opt == "rendezvous_threshold") + { + static char buffer[32]; + std::string temp = std::to_string(m_controller->rendezvous_threshold()); + strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); + return buffer; + } + else { return "unspecified"; } } - else { return "unspecified"; } -} -std::shared_ptr -context_impl::init_libfabric_controller(oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, - int size, int threads) -{ - // only allow one thread to pass, make other wait - static std::mutex m_init_mutex; - std::lock_guard lock(m_init_mutex); - static std::shared_ptr instance(nullptr); - if (!instance.get()) + std::shared_ptr context_impl::init_libfabric_controller( + oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads) { - LF_DEB(src_deb, debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), - "size", debug::dec<3>(size), "threads", debug::dec<3>(threads))); - instance.reset(new controller_type()); - instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); + // only allow one thread to pass, make other wait + static std::mutex m_init_mutex; + std::lock_guard lock(m_init_mutex); + static std::shared_ptr instance(nullptr); + if (!instance.get()) + { + LF_DEB(src_deb, + debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", + debug::dec<3>(size), "threads", debug::dec<3>(threads))); + instance.reset(new controller_type()); + instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); + } + return instance; } - return instance; -} -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index a7c0c112..7a936223 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -9,148 +9,152 @@ */ #pragma once -#include #include +#include #include -#include #include #include // paths relative to backend #include <../context_base.hpp> -#include #include +#include #include -namespace oomph -{ - -static NS_DEBUG::enable_print ctx_deb("CONTEXT"); - -using controller_type = libfabric::controller; - -class context_impl : public context_base -{ - public: - using region_type = libfabric::memory_segment; - using domain_type = region_type::provider_domain; - using device_region_type = libfabric::memory_segment; - using heap_type = hwmalloc::heap; - using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; - - private: - heap_type m_heap; - domain_type* m_domain; - std::shared_ptr m_controller; - std::uintptr_t m_ctxt_tag; - - public: - // -------------------------------------------------- - // create a singleton ptr to a libfabric controller that - // can be shared between oomph context objects - static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, - MPI_Comm comm, int rank, int size, int threads); - - // queue for shared recv callbacks - callback_queue m_recv_cb_queue; - // queue for canceled shared recv requests - callback_queue m_recv_cb_cancel; - - public: - context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config); - context_impl(context_impl const&) = delete; - context_impl(context_impl&&) = delete; - - region_type make_region(void* const ptr, std::size_t size, int device_id) +namespace oomph { + + static NS_DEBUG::enable_print ctx_deb("CONTEXT"); + + using controller_type = libfabric::controller; + + class context_impl : public context_base { - if (m_controller->get_mrbind()) + public: + using region_type = libfabric::memory_segment; + using domain_type = region_type::provider_domain; + using device_region_type = libfabric::memory_segment; + using heap_type = hwmalloc::heap; + using callback_queue = boost::lockfree::queue, boost::lockfree::allocator>>; + + private: + heap_type m_heap; + domain_type* m_domain; + std::shared_ptr m_controller; + std::uintptr_t m_ctxt_tag; + + public: + // -------------------------------------------------- + // create a singleton ptr to a libfabric controller that + // can be shared between oomph context objects + static std::shared_ptr init_libfabric_controller( + oomph::context_impl* ctx, MPI_Comm comm, int rank, int size, int threads); + + // queue for shared recv callbacks + callback_queue m_recv_cb_queue; + // queue for canceled shared recv requests + callback_queue m_recv_cb_cancel; + + public: + context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + std::size_t message_pool_reserve); + context_impl(context_impl const&) = delete; + context_impl(context_impl&&) = delete; + + region_type make_region(void* const ptr, std::size_t size, int device_id) { - void* endpoint = m_controller->get_rx_endpoint().get_ep(); - return libfabric::memory_segment(m_domain, ptr, size, true, endpoint, device_id); + if (m_controller->get_mrbind()) + { + void* endpoint = m_controller->get_rx_endpoint().get_ep(); + return libfabric::memory_segment(m_domain, ptr, size, true, endpoint, device_id); + } + else + { + return libfabric::memory_segment(m_domain, ptr, size, false, nullptr, device_id); + } } - else { return libfabric::memory_segment(m_domain, ptr, size, false, nullptr, device_id); } - } - auto& get_heap() noexcept { return m_heap; } + auto& get_heap() noexcept { return m_heap; } - communicator_impl* get_communicator(); + communicator_impl* get_communicator(); - // we must modify all tags to use 32bits of context ptr for uniqueness - inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } + // we must modify all tags to use 32bits of context ptr for uniqueness + inline std::uintptr_t get_context_tag() { return m_ctxt_tag; } - inline controller_type* get_controller() /*const */ { return m_controller.get(); } - const char* get_transport_option(const std::string& opt); + inline controller_type* get_controller() /*const */ { return m_controller.get(); } + char const* get_transport_option(std::string const& opt); - void progress() { get_controller()->poll_for_work_completions(nullptr); } + void progress() { get_controller()->poll_for_work_completions(nullptr); } - bool cancel_recv(detail::shared_request_state* s) - { - // get the original message operation context - auto op_ctx = &(s->m_operation_context); + bool cancel_recv(detail::shared_request_state* s) + { + // get the original message operation context + auto op_ctx = &(s->m_operation_context); - // submit the cancellation request - bool ok = (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); + // submit the cancellation request + bool ok = + (fi_cancel(&(get_controller()->get_rx_endpoint().get_ep()->fid), op_ctx) == 0); - // if the cancel operation failed completely, return - if (!ok) return false; + // if the cancel operation failed completely, return + if (!ok) return false; - bool found = false; - while (!found) - { - get_controller()->poll_recv_queue(get_controller()->get_rx_endpoint().get_rx_cq(), - nullptr); - // otherwise, poll until we know if it worked - std::stack temp_stack; - detail::shared_request_state* temp; - while (!found && m_recv_cb_cancel.pop(temp)) + bool found = false; + while (!found) { - if (temp == s) + get_controller()->poll_recv_queue( + get_controller()->get_rx_endpoint().get_rx_cq(), nullptr); + // otherwise, poll until we know if it worked + std::stack temp_stack; + detail::shared_request_state* temp; + while (!found && m_recv_cb_cancel.pop(temp)) { - // our recv was cancelled correctly - found = true; - LF_DEB(oomph::ctx_deb, debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", - "op_ctx", NS_DEBUG::ptr(op_ctx))); - auto ptr = s->release_self_ref(); - s->set_canceled(); + if (temp == s) + { + // our recv was cancelled correctly + found = true; + LF_DEB(oomph::ctx_deb, + debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", "op_ctx", + NS_DEBUG::ptr(op_ctx))); + auto ptr = s->release_self_ref(); + s->set_canceled(); + } + else + { + // a different cancel operation + temp_stack.push(temp); + } } - else + // return any weird unhandled cancels back to the queue + while (!temp_stack.empty()) { - // a different cancel operation - temp_stack.push(temp); + auto temp = temp_stack.top(); + temp_stack.pop(); + m_recv_cb_cancel.push(temp); } } - // return any weird unhandled cancels back to the queue - while (!temp_stack.empty()) - { - auto temp = temp_stack.top(); - temp_stack.pop(); - m_recv_cb_cancel.push(temp); - } + return found; } - return found; - } - unsigned int num_tag_bits() const noexcept { return 32; } -}; + unsigned int num_tag_bits() const noexcept { return 32; } + }; -// -------------------------------------------------------------------- -template<> -inline oomph::libfabric::memory_segment -register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) -{ - return c.make_region(ptr, size, -2); -} + // -------------------------------------------------------------------- + template <> + inline oomph::libfabric::memory_segment + register_memory(oomph::context_impl& c, void* const ptr, std::size_t size) + { + return c.make_region(ptr, size, -2); + } #if OOMPH_ENABLE_DEVICE -template<> -inline oomph::libfabric::memory_segment -register_device_memory(context_impl& c, int device_id, void* ptr, std::size_t size) -{ - return c.make_region(ptr, size, device_id); -} + template <> + inline oomph::libfabric::memory_segment register_device_memory( + context_impl& c, int device_id, void* ptr, std::size_t size) + { + return c.make_region(ptr, size, device_id); + } #endif -} // namespace oomph +} // namespace oomph diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 5becc148..95e3ad17 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -35,428 +35,436 @@ #include #include // -#include "oomph_libfabric_defines.hpp" +#include "controller_base.hpp" #include "fabric_error.hpp" #include "locality.hpp" #include "memory_region.hpp" +#include "oomph_libfabric_defines.hpp" #include "operation_context.hpp" -#include "controller_base.hpp" // #include // #include -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked -using namespace oomph::debug; -template -inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); -// -static NS_DEBUG::enable_print cnt_err("CONTROL"); -} // namespace NS_DEBUG - -namespace oomph::libfabric -{ - -class controller : public controller_base -{ - public: - // -------------------------------------------------------------------- - controller() - : controller_base() - { - } + using namespace oomph::debug; + template + inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); + // + static NS_DEBUG::enable_print cnt_err("CONTROL"); +} // namespace NS_DEBUG - // -------------------------------------------------------------------- - void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) - { - // Broadcast address of all endpoints to all ranks - // and fill address vector with info - exchange_addresses(av_, mpi_comm); - } +namespace oomph::libfabric { - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() + class controller : public controller_base { + public: + // -------------------------------------------------------------------- + controller() + : controller_base() + { + } + + // -------------------------------------------------------------------- + void initialize_derived(std::string const&, bool, int, size_t, MPI_Comm mpi_comm) + { + // Broadcast address of all endpoints to all ranks + // and fill address vector with info + exchange_addresses(av_, mpi_comm); + } + + // -------------------------------------------------------------------- + constexpr fi_threading threadlevel_flags() + { #if defined(HAVE_LIBFABRIC_GNI) /*|| defined(HAVE_LIBFABRIC_CXI)*/ - return FI_THREAD_ENDPOINT; + return FI_THREAD_ENDPOINT; #else - return FI_THREAD_SAFE; + return FI_THREAD_SAFE; #endif - } + } - // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() - { + // -------------------------------------------------------------------- + constexpr uint64_t caps_flags() + { #if OOMPH_ENABLE_DEVICE && !defined(HAVE_LIBFABRIC_TCP) - std::int64_t hmem_flags = FI_HMEM; + std::int64_t hmem_flags = FI_HMEM; #else - std::int64_t hmem_flags = 0; + std::int64_t hmem_flags = 0; #endif - return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | FI_SEND | - FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; - } - - // -------------------------------------------------------------------- - // we do not need to perform any special actions on init (to contact root node) - void setup_root_node_address(struct fi_info* /*info*/) {} + return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | + FI_SEND | FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; + } - // -------------------------------------------------------------------- - // send address to rank 0 and receive array of all localities - void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::vector localities(size * locality_defs::array_size, 0); - // - if (rank > 0) - { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending here"), iplocality(here_), - "size", locality_defs::array_size)); - /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, - 0, // dst rank - 0, // tag - comm); + // -------------------------------------------------------------------- + // we do not need to perform any special actions on init (to contact root node) + void setup_root_node_address(struct fi_info* /*info*/) {} - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); - - MPI_Status status; - /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, - 0, // src rank - 0, // tag - comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); - } - else + // -------------------------------------------------------------------- + // send address to rank 0 and receive array of all localities + void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); - memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); - for (int i = 1; i < size; ++i) + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + std::vector localities(size * locality_defs::array_size, 0); + // + if (rank > 0) { LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving address"), debug::dec<>(i))); + debug(debug::str<>("sending here"), iplocality(here_), "size", + locality_defs::array_size)); + /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, + 0, // dst rank + 0, // tag + comm); + + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); + MPI_Status status; - /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], - size * locality_defs::array_size, MPI_CHAR, - i, // src rank - 0, // tag + /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, + 0, // src rank + 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("received address"), debug::dec<>(i))); + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); + } + else + { + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); + memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); + for (int i = 1; i < size; ++i) + { + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("receiving address"), debug::dec<>(i))); + MPI_Status status; + /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], + size * locality_defs::array_size, MPI_CHAR, + i, // src rank + 0, // tag + comm, &status); + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("received address"), debug::dec<>(i))); + } + + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); + for (int i = 1; i < size; ++i) + { + LF_DEB( + NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); + /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, + MPI_CHAR, + i, // dst rank + 0, // tag + comm); + } } - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); - for (int i = 1; i < size; ++i) + // all ranks should now have a full localities vector + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); + for (int i = 0; i < size; ++i) { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); - /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, MPI_CHAR, - i, // dst rank - 0, // tag - comm); + locality temp; + int offset = i * locality_defs::array_size; + memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); + insert_address(av, temp); } } - // all ranks should now have a full localities vector - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); - for (int i = 0; i < size; ++i) + // -------------------------------------------------------------------- + // if we did not bootstrap, then fetch the list of all localities + // and insert each one into the address vector + void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) { - locality temp; - int offset = i * locality_defs::array_size; - memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); - insert_address(av, temp); - } - } + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - // -------------------------------------------------------------------- - // if we did not bootstrap, then fetch the list of all localities - // and insert each one into the address vector - void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - - int rank, size; - MPI_Comm_rank(mpi_comm, &rank); - MPI_Comm_size(mpi_comm, &size); + int rank, size; + MPI_Comm_rank(mpi_comm, &rank); + MPI_Comm_size(mpi_comm, &size); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("initialize_localities"), size, "localities")); + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("initialize_localities"), size, "localities")); - MPI_exchange_localities(av, mpi_comm, rank, size); - debug_print_av_vector(size); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); - } + MPI_exchange_localities(av, mpi_comm, rank, size); + debug_print_av_vector(size); + LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_tx_lock() + { #if defined(HAVE_LIBFABRIC_GNI) - return true; + return true; #elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; + // @todo : cxi provider is not yet thread safe using scalable endpoints + return false; #else - return (threadlevel_flags() == FI_THREAD_SAFE || + return (threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::threadlocalTx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_rx_lock() + { #ifdef HAVE_LIBFABRIC_GNI - return true; + return true; #else - return ( - threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); + return (threadlevel_flags() == FI_THREAD_SAFE || + endpoint_type_ == endpoint_type::scalableTxRx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - int poll_send_queue(fid_cq* send_cq, void* user_data) - { + // -------------------------------------------------------------------- + int poll_send_queue(fid_cq* send_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - send_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - send_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - send_poll_stamp) + .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + send_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = try_tx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = try_tx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); - LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); + static auto polling = + NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); + LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); - // poll for completions - { - ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(send_cq, &e, 0); - (void)err_sz; - - // flags might not be set correctly - if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) + // poll for completions { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_SEND with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + ret = fi_cq_read(send_cq, &entry[0], max_completions_per_poll_); } - else if ((e.flags & FI_RMA) != 0) + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_RMA with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror(send_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(send_cq, &e, 0); + (void) err_sz; + + // flags might not be set correctly + if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) + { + NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " + "FI_SEND with len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", + NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + else if ((e.flags & FI_RMA) != 0) + { + NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " + "FI_RMA with len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", + NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + operation_context* handler = reinterpret_cast(e.op_context); + handler->handle_error(e); + return 0; } - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_error(e); - return 0; } - } - // - // exit possibly locked region and process each completion - // - if (ret > 0) - { - int processed = 0; - for (int i = 0; i < ret; ++i) + // + // exit possibly locked region and process each completion + // + if (ret > 0) { - ++sends_complete; - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", debug::hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) + int processed = 0; + for (int i = 0; i < ret; ++i) { + ++sends_complete; LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), "txcq tagged send completion", - NS_DEBUG::ptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_send_completion(user_data); - } - else - { - NS_DEBUG::cnt_err.error("Received an unknown txcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); - std::terminate(); + debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", + fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", + debug::dec<>(entry[i].flags), ")", "context", + NS_DEBUG::ptr(entry[i].op_context), "length", + debug::hex<6>(entry[i].len))); + if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) + { + LF_DEB(NS_DEBUG::cnt_deb<9>, + debug(debug::str<>("Completion"), "txcq tagged send completion", + NS_DEBUG::ptr(entry[i].op_context))); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_send_completion(user_data); + } + else + { + NS_DEBUG::cnt_err.error("Received an unknown txcq completion", + debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + std::terminate(); + } } + return processed; } - return processed; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check + else if (ret == 0 || ret == -FI_EAGAIN) + { + // do nothing, we will try again on the next check + } + else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } + return 0; } - else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } - return 0; - } - // -------------------------------------------------------------------- - int poll_recv_queue(fid_cq* rx_cq, void* user_data) - { + // -------------------------------------------------------------------- + int poll_recv_queue(fid_cq* rx_cq, void* user_data) + { #ifdef EXCESSIVE_POLLING_BACKOFF_MICRO_S - std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); - if (std::chrono::duration_cast(now - recv_poll_stamp).count() < - EXCESSIVE_POLLING_BACKOFF_MICRO_S) - return 0; - recv_poll_stamp = now; + std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(now - recv_poll_stamp) + .count() < EXCESSIVE_POLLING_BACKOFF_MICRO_S) + return 0; + recv_poll_stamp = now; #endif - int ret; - fi_cq_msg_entry entry[max_completions_array_limit_]; - assert(max_completions_per_poll_ <= max_completions_array_limit_); - { - auto lock = get_rx_lock(); + int ret; + fi_cq_msg_entry entry[max_completions_array_limit_]; + assert(max_completions_per_poll_ <= max_completions_array_limit_); + { + auto lock = get_rx_lock(); - // if we're not threadlocal and didn't get the lock, - // then another thread is polling now, just exit - if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } + // if we're not threadlocal and didn't get the lock, + // then another thread is polling now, just exit + if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } - static auto polling = - NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); - LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); + static auto polling = + NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); + LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); - // poll for completions - { - ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); - } - // if there is an error, retrieve it - if (ret == -FI_EAVAIL) - { - // read the full error status - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(rx_cq, &e, 0); - (void)err_sz; - // from the manpage 'man 3 fi_cq_readerr' - if (e.err == FI_ECANCELED) + // poll for completions { - LF_DEB(NS_DEBUG::cnt_deb<1>, - debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), - "len", debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context))); - // the request was cancelled, we can simply exit - // as the canceller will have doone any cleanup needed - operation_context* handler = reinterpret_cast(e.op_context); - handler->handle_cancelled(); - return 0; + ret = fi_cq_read(rx_cq, &entry[0], max_completions_per_poll_); } - else if (e.err != FI_SUCCESS) + // if there is an error, retrieve it + if (ret == -FI_EAVAIL) { - NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", - debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "error msg", - fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*)e.buf, e.len)); + // read the full error status + struct fi_cq_err_entry e = {}; + int err_sz = fi_cq_readerr(rx_cq, &e, 0); + (void) err_sz; + // from the manpage 'man 3 fi_cq_readerr' + if (e.err == FI_ECANCELED) + { + LF_DEB(NS_DEBUG::cnt_deb<1>, + debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), + "len", debug::hex<6>(e.len), "context", + NS_DEBUG::ptr(e.op_context))); + // the request was cancelled, we can simply exit + // as the canceller will have doone any cleanup needed + operation_context* handler = + reinterpret_cast(e.op_context); + handler->handle_cancelled(); + return 0; + } + else if (e.err != FI_SUCCESS) + { + NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", + debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", + debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), + "error msg", + fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + } + operation_context* handler = reinterpret_cast(e.op_context); + if (handler) handler->handle_error(e); + return 0; } - operation_context* handler = reinterpret_cast(e.op_context); - if (handler) handler->handle_error(e); - return 0; } - } - // - // release the lock and process each completion - // - if (ret > 0) - { - int processed = 0; - for (int i = 0; i < ret; ++i) + // + // release the lock and process each completion + // + if (ret > 0) { - ++recvs_complete; - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), i, "rxcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", debug::hex<6>(entry[i].len))); - if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) + int processed = 0; + for (int i = 0; i < ret; ++i) { + ++recvs_complete; LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), "rxcq tagged recv completion", - NS_DEBUG::ptr(entry[i].op_context))); - - operation_context* handler = - reinterpret_cast(entry[i].op_context); - processed += handler->handle_tagged_recv_completion(user_data); - } - else - { - NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); - std::terminate(); + debug(debug::str<>("Completion"), i, "rxcq flags", + fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", + debug::dec<>(entry[i].flags), ")", "context", + NS_DEBUG::ptr(entry[i].op_context), "length", + debug::hex<6>(entry[i].len))); + if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) + { + LF_DEB(NS_DEBUG::cnt_deb<2>, + debug(debug::str<>("Completion"), "rxcq tagged recv completion", + NS_DEBUG::ptr(entry[i].op_context))); + + operation_context* handler = + reinterpret_cast(entry[i].op_context); + processed += handler->handle_tagged_recv_completion(user_data); + } + else + { + NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", + debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + std::terminate(); + } } + return processed; + } + else if (ret == 0 || ret == -FI_EAGAIN) + { + // do nothing, we will try again on the next check } - return processed; + else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } + return 0; } - else if (ret == 0 || ret == -FI_EAGAIN) + + // Jobs started using mpi don't have this info + struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) { - // do nothing, we will try again on the next check + (void) info; // unused variable warning + (void) tx; // unused variable warning + + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + struct fi_info* hints = fi_dupinfo(info); + if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); + // clear any Rx address data that might be set + // free(hints->src_addr); + // hints->src_addr = nullptr; + // hints->src_addrlen = 0; + free(hints->dest_addr); + hints->dest_addr = nullptr; + hints->dest_addrlen = 0; + return hints; } - else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } - return 0; - } + }; - // Jobs started using mpi don't have this info - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) - { - (void)info; // unused variable warning - (void)tx; // unused variable warning - - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - // clear any Rx address data that might be set - // free(hints->src_addr); - // hints->src_addr = nullptr; - // hints->src_addrlen = 0; - free(hints->dest_addr); - hints->dest_addr = nullptr; - hints->dest_addrlen = 0; - return hints; - } -}; - -} // namespace oomph::libfabric +} // namespace oomph::libfabric diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index e1ce377e..a5eb1705 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -53,15 +53,13 @@ // ---------------------------------------- // auto progress (libfabric thread) or manual // ---------------------------------------- -static fi_progress -libfabric_progress_type() +static fi_progress libfabric_progress_type() { if (std::getenv("LIBFABRIC_AUTO_PROGRESS") == nullptr) return FI_PROGRESS_MANUAL; return FI_PROGRESS_AUTO; } -static const char* -libfabric_progress_string() +static char const* libfabric_progress_string() { if (libfabric_progress_type() == FI_PROGRESS_AUTO) return "auto"; return "manual"; @@ -93,8 +91,7 @@ enum class endpoint_type : int // ---------------------------------------- // single endpoint or separate for send/recv // ---------------------------------------- -static endpoint_type -libfabric_endpoint_type() +static endpoint_type libfabric_endpoint_type() { auto env_str = std::getenv("LIBFABRIC_ENDPOINT_TYPE"); if (env_str == nullptr) return endpoint_type::single; @@ -114,8 +111,7 @@ libfabric_endpoint_type() return endpoint_type::single; } -static const char* -libfabric_endpoint_string() +static char const* libfabric_endpoint_string() { auto lf_ep_type = libfabric_endpoint_type(); if (lf_ep_type == endpoint_type::multiple) return "multiple"; @@ -128,8 +124,7 @@ libfabric_endpoint_string() // ---------------------------------------- // number of completions to handle per poll // ---------------------------------------- -static int -libfabric_completions_per_poll() +static int libfabric_completions_per_poll() { auto env_str = std::getenv("LIBFABRIC_POLL_SIZE"); if (env_str != nullptr) @@ -148,8 +143,7 @@ libfabric_completions_per_poll() // ---------------------------------------- // Eager/Rendezvous threshold // ---------------------------------------- -static int -libfabric_rendezvous_threshold(int def_val) +static int libfabric_rendezvous_threshold(int def_val) { auto env_str = std::getenv("LIBFABRIC_RENDEZVOUS_THRESHOLD"); if (env_str != nullptr) @@ -170,9 +164,9 @@ libfabric_rendezvous_threshold(int def_val) // Needed on Cray for GNI extensions // ------------------------------------------------ #ifdef HAVE_LIBFABRIC_GNI -#include "rdma/fi_ext_gni.h" +# include "rdma/fi_ext_gni.h" //#define OOMPH_GNI_REG "none" -#define OOMPH_GNI_REG "internal" +# define OOMPH_GNI_REG "internal" //#define OOMPH_GNI_REG "udreg" static std::vector> gni_strs = { @@ -213,19 +207,18 @@ static std::vector> gni_ints = { // api 2.0, then we ask for that, but the cxi legacy library on daint only supports 1.15, // so drop back to that version if needed #if defined(OOMPH_LIBFABRIC_V1_API) -#define LIBFABRIC_FI_VERSION_MAJOR 1 -#define LIBFABRIC_FI_VERSION_MINOR 15 +# define LIBFABRIC_FI_VERSION_MAJOR 1 +# define LIBFABRIC_FI_VERSION_MINOR 15 #else -#define LIBFABRIC_FI_VERSION_MAJOR 2 -#define LIBFABRIC_FI_VERSION_MINOR 0 +# define LIBFABRIC_FI_VERSION_MAJOR 2 +# define LIBFABRIC_FI_VERSION_MINOR 0 #endif -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print cnb_deb("CONBASE"); -static NS_DEBUG::enable_print cnb_err("CONBASE"); -} // namespace NS_DEBUG +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print cnb_deb("CONBASE"); + static NS_DEBUG::enable_print cnb_err("CONBASE"); +} // namespace NS_DEBUG /** @brief a class to return the number of progressed callbacks */ struct progress_status @@ -237,7 +230,7 @@ struct progress_status int num_sends() const noexcept { return m_num_sends; } int num_recvs() const noexcept { return m_num_recvs; } - progress_status& operator+=(const progress_status& other) noexcept + progress_status& operator+=(progress_status const& other) noexcept { m_num_sends += other.m_num_sends; m_num_recvs += other.m_num_recvs; @@ -245,814 +238,822 @@ struct progress_status } }; -namespace NS_LIBFABRIC -{ -/// A wrapper around fi_close that reports any error -/// Because we use so many handles, we must be careful to -/// delete them all before closing resources that use them -template -void -fidclose(Handle fid, const char* msg) -{ - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); - int ret = fi_close(fid); - if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } - else if (ret == FI_SUCCESS) { return; } - throw NS_LIBFABRIC::fabric_error(ret, "fi_close error"); -} - -/// when using thread local endpoints, we encapsulate things that -/// are needed to manage an endpoint -struct endpoint_wrapper -{ - private: - friend class controller; - - fid_ep* ep_ = nullptr; - fid_cq* rq_ = nullptr; - fid_cq* tq_ = nullptr; - const char* name_ = nullptr; - - public: - endpoint_wrapper() {} - endpoint_wrapper(fid_ep* ep, fid_cq* rq, fid_cq* tq, const char* name) - : ep_(ep) - , rq_(rq) - , tq_(tq) - , name_(name) +namespace NS_LIBFABRIC { + /// A wrapper around fi_close that reports any error + /// Because we use so many handles, we must be careful to + /// delete them all before closing resources that use them + template + void fidclose(Handle fid, char const* msg) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); + int ret = fi_close(fid); + if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } + else if (ret == FI_SUCCESS) { return; } + throw NS_LIBFABRIC::fabric_error(ret, "fi_close error"); } - // to keep boost::lockfree happy, we need these copy operators - endpoint_wrapper(const endpoint_wrapper& ep) = default; - endpoint_wrapper& operator=(const endpoint_wrapper& ep) = default; - - void cleanup() + /// when using thread local endpoints, we encapsulate things that + /// are needed to manage an endpoint + struct endpoint_wrapper { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); - if (ep_) - { - fidclose(&ep_->fid, "endpoint"); - ep_ = nullptr; - } - if (rq_) + private: + friend class controller; + + fid_ep* ep_ = nullptr; + fid_cq* rq_ = nullptr; + fid_cq* tq_ = nullptr; + char const* name_ = nullptr; + + public: + endpoint_wrapper() {} + endpoint_wrapper(fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name) + : ep_(ep) + , rq_(rq) + , tq_(tq) + , name_(name) { - fidclose(&rq_->fid, "rq"); - rq_ = nullptr; - } - if (tq_) - { - fidclose(&tq_->fid, "tq"); - tq_ = nullptr; + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); } - } - - inline fid_ep* get_ep() { return ep_; } - inline fid_cq* get_rx_cq() { return rq_; } - inline fid_cq* get_tx_cq() { return tq_; } - inline void set_tx_cq(fid_cq* cq) { tq_ = cq; } - inline const char* get_name() { return name_; } -}; - -using region_type = NS_MEMORY::memory_handle; -using endpoint_context_pool = - boost::lockfree::queue>; - -struct stack_endpoint -{ - endpoint_wrapper endpoint_; - endpoint_context_pool* pool_; - // - stack_endpoint() - : endpoint_() - , pool_(nullptr) - { - } - // - stack_endpoint(fid_ep* ep, fid_cq* rq, fid_cq* tq, const char* name, - endpoint_context_pool* pool) - : endpoint_(ep, rq, tq, name) - , pool_(pool) - { - } - // - stack_endpoint& operator=(stack_endpoint&& other) - { - endpoint_ = std::move(other.endpoint_); - pool_ = std::exchange(other.pool_, nullptr); - return *this; - } - - ~stack_endpoint() - { - if (!pool_) return; - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), "tx cq", - NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); - pool_->push(endpoint_); - } - - inline fid_ep* get_ep() { return endpoint_.get_ep(); } - inline fid_cq* get_rx_cq() { return endpoint_.get_rx_cq(); } + // to keep boost::lockfree happy, we need these copy operators + endpoint_wrapper(endpoint_wrapper const& ep) = default; + endpoint_wrapper& operator=(endpoint_wrapper const& ep) = default; - inline fid_cq* get_tx_cq() { return endpoint_.get_tx_cq(); } -}; - -struct endpoints_lifetime_manager -{ - // threadlocal endpoints - static inline thread_local stack_endpoint tl_tx_; - static inline thread_local stack_endpoint tl_stx_; - static inline thread_local stack_endpoint tl_srx_; - // non threadlocal endpoints, tx/rx - endpoint_wrapper ep_tx_; - endpoint_wrapper ep_rx_; -}; - -template -class controller_base -{ - public: - typedef std::mutex mutex_type; - typedef std::lock_guard scoped_lock; - typedef std::unique_lock unique_lock; + void cleanup() + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + if (ep_) + { + fidclose(&ep_->fid, "endpoint"); + ep_ = nullptr; + } + if (rq_) + { + fidclose(&rq_->fid, "rq"); + rq_ = nullptr; + } + if (tq_) + { + fidclose(&tq_->fid, "tq"); + tq_ = nullptr; + } + } - protected: - // For threadlocal/scalable endpoints, - // we use a dedicated threadlocal endpoint wrapper - std::unique_ptr eps_; + inline fid_ep* get_ep() { return ep_; } + inline fid_cq* get_rx_cq() { return rq_; } + inline fid_cq* get_tx_cq() { return tq_; } + inline void set_tx_cq(fid_cq* cq) { tq_ = cq; } + inline char const* get_name() { return name_; } + }; + using region_type = NS_MEMORY::memory_handle; using endpoint_context_pool = boost::lockfree::queue>; - endpoint_context_pool tx_endpoints_; - endpoint_context_pool rx_endpoints_; - - struct fi_info* fabric_info_; - struct fid_fabric* fabric_; - struct fid_domain* fabric_domain_; - struct fid_pep* ep_passive_; - - struct fid_av* av_; - endpoint_type endpoint_type_; - - locality here_; - locality root_; - // used during queue creation setup and during polling - mutex_type controller_mutex_; - - // used to protect send/recv resources - alignas(64) mutex_type send_mutex_; - alignas(64) mutex_type recv_mutex_; - - std::size_t tx_inject_size_; - std::size_t tx_attr_size_; - std::size_t rx_attr_size_; - - uint32_t max_completions_per_poll_; - uint32_t msg_rendezvous_threshold_; - inline static constexpr uint32_t max_completions_array_limit_ = 256; + struct stack_endpoint + { + endpoint_wrapper endpoint_; + endpoint_context_pool* pool_; + // + stack_endpoint() + : endpoint_() + , pool_(nullptr) + { + } + // + stack_endpoint( + fid_ep* ep, fid_cq* rq, fid_cq* tq, char const* name, endpoint_context_pool* pool) + : endpoint_(ep, rq, tq, name) + , pool_(pool) + { + } + // + stack_endpoint& operator=(stack_endpoint&& other) + { + endpoint_ = std::move(other.endpoint_); + pool_ = std::exchange(other.pool_, nullptr); + return *this; + } - static inline thread_local std::chrono::steady_clock::time_point send_poll_stamp; - static inline thread_local std::chrono::steady_clock::time_point recv_poll_stamp; + ~stack_endpoint() + { + if (!pool_) return; + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), + "tx cq", NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); + pool_->push(endpoint_); + } - // set if FI_MR_LOCAL is required (local access requires binding) - bool mrlocal = false; - // set if FI_MR_ENDPOINT is required (per endpoint memory binding) - bool mrbind = false; - // set if FI_MR_HRMEM provider requires heterogeneous memory registration - bool mrhmem = false; + inline fid_ep* get_ep() { return endpoint_.get_ep(); } - public: - bool get_mrbind() { return mrbind; } + inline fid_cq* get_rx_cq() { return endpoint_.get_rx_cq(); } - public: - NS_LIBFABRIC::simple_counter sends_posted_; - NS_LIBFABRIC::simple_counter recvs_posted_; - NS_LIBFABRIC::simple_counter sends_readied_; - NS_LIBFABRIC::simple_counter recvs_readied_; - NS_LIBFABRIC::simple_counter sends_complete; - NS_LIBFABRIC::simple_counter recvs_complete; + inline fid_cq* get_tx_cq() { return endpoint_.get_tx_cq(); } + }; - void finvoke(const char* msg, const char* err, int ret) + struct endpoints_lifetime_manager { - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); - } + // threadlocal endpoints + static inline thread_local stack_endpoint tl_tx_; + static inline thread_local stack_endpoint tl_stx_; + static inline thread_local stack_endpoint tl_srx_; + // non threadlocal endpoints, tx/rx + endpoint_wrapper ep_tx_; + endpoint_wrapper ep_rx_; + }; - public: - // -------------------------------------------------------------------- - controller_base() - : eps_(nullptr) - , tx_endpoints_(1) - , rx_endpoints_(1) - , fabric_info_(nullptr) - , fabric_(nullptr) - , fabric_domain_(nullptr) - , ep_passive_(nullptr) - , av_(nullptr) - , tx_inject_size_(0) - , tx_attr_size_(0) - , rx_attr_size_(0) - , max_completions_per_poll_(1) - , msg_rendezvous_threshold_(0x4000) - , sends_posted_(0) - , recvs_posted_(0) - , sends_readied_(0) - , recvs_readied_(0) - , sends_complete(0) - , recvs_complete(0) + template + class controller_base { - } + public: + typedef std::mutex mutex_type; + typedef std::lock_guard scoped_lock; + typedef std::unique_lock unique_lock; + + protected: + // For threadlocal/scalable endpoints, + // we use a dedicated threadlocal endpoint wrapper + std::unique_ptr eps_; + + using endpoint_context_pool = + boost::lockfree::queue>; + endpoint_context_pool tx_endpoints_; + endpoint_context_pool rx_endpoints_; + + struct fi_info* fabric_info_; + struct fid_fabric* fabric_; + struct fid_domain* fabric_domain_; + struct fid_pep* ep_passive_; + + struct fid_av* av_; + endpoint_type endpoint_type_; + + locality here_; + locality root_; + + // used during queue creation setup and during polling + mutex_type controller_mutex_; + + // used to protect send/recv resources + alignas(64) mutex_type send_mutex_; + alignas(64) mutex_type recv_mutex_; + + std::size_t tx_inject_size_; + std::size_t tx_attr_size_; + std::size_t rx_attr_size_; + + uint32_t max_completions_per_poll_; + uint32_t msg_rendezvous_threshold_; + inline static constexpr uint32_t max_completions_array_limit_ = 256; + + static inline thread_local std::chrono::steady_clock::time_point send_poll_stamp; + static inline thread_local std::chrono::steady_clock::time_point recv_poll_stamp; + + // set if FI_MR_LOCAL is required (local access requires binding) + bool mrlocal = false; + // set if FI_MR_ENDPOINT is required (per endpoint memory binding) + bool mrbind = false; + // set if FI_MR_HRMEM provider requires heterogeneous memory registration + bool mrhmem = false; + + public: + bool get_mrbind() { return mrbind; } + + public: + NS_LIBFABRIC::simple_counter sends_posted_; + NS_LIBFABRIC::simple_counter recvs_posted_; + NS_LIBFABRIC::simple_counter sends_readied_; + NS_LIBFABRIC::simple_counter recvs_readied_; + NS_LIBFABRIC::simple_counter sends_complete; + NS_LIBFABRIC::simple_counter recvs_complete; + + void finvoke(char const* msg, char const* err, int ret) + { + LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); + } - // -------------------------------------------------------------------- - // clean up all resources - ~controller_base() - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - unsigned int messages_handled_ = 0; - unsigned int rma_reads_ = 0; - unsigned int recv_deletes_ = 0; + public: + // -------------------------------------------------------------------- + controller_base() + : eps_(nullptr) + , tx_endpoints_(1) + , rx_endpoints_(1) + , fabric_info_(nullptr) + , fabric_(nullptr) + , fabric_domain_(nullptr) + , ep_passive_(nullptr) + , av_(nullptr) + , tx_inject_size_(0) + , tx_attr_size_(0) + , rx_attr_size_(0) + , max_completions_per_poll_(1) + , msg_rendezvous_threshold_(0x4000) + , sends_posted_(0) + , recvs_posted_(0) + , sends_readied_(0) + , recvs_readied_(0) + , sends_complete(0) + , recvs_complete(0) + { + } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("counters"), "Received messages", debug::dec<>(messages_handled_), - "Total reads", debug::dec<>(rma_reads_), "Total deletes", - debug::dec<>(recv_deletes_), "deletes error", - debug::dec<>(messages_handled_ - recv_deletes_))); + // -------------------------------------------------------------------- + // clean up all resources + ~controller_base() + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + unsigned int messages_handled_ = 0; + unsigned int rma_reads_ = 0; + unsigned int recv_deletes_ = 0; - tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("counters"), "Received messages", + debug::dec<>(messages_handled_), "Total reads", debug::dec<>(rma_reads_), + "Total deletes", debug::dec<>(recv_deletes_), "deletes error", + debug::dec<>(messages_handled_ - recv_deletes_))); - // No cleanup threadlocals : done by consume_all cleanup above - // eps_->tl_tx_.endpoint_.cleanup(); - // eps_->tl_stx_.endpoint_.cleanup(); - // eps_->tl_srx_.endpoint_.cleanup(); + tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); + rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); - // non threadlocal endpoints, tx/rx - eps_->ep_tx_.cleanup(); - eps_->ep_rx_.cleanup(); + // No cleanup threadlocals : done by consume_all cleanup above + // eps_->tl_tx_.endpoint_.cleanup(); + // eps_->tl_stx_.endpoint_.cleanup(); + // eps_->tl_srx_.endpoint_.cleanup(); - // Cleanup endpoints - eps_.reset(nullptr); + // non threadlocal endpoints, tx/rx + eps_->ep_tx_.cleanup(); + eps_->ep_rx_.cleanup(); - // delete adddress vector - fidclose(&av_->fid, "Address Vector"); + // Cleanup endpoints + eps_.reset(nullptr); - try - { - fidclose(&fabric_domain_->fid, "Domain"); - } - catch (fabric_error& e) - { - std::cout << "fabric domain close failed : Ensure all RMA " - "objects are freed before program termination" - << std::endl; - } - fidclose(&fabric_->fid, "Fabric"); + // delete adddress vector + fidclose(&av_->fid, "Address Vector"); - // clean up - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); + try + { + fidclose(&fabric_domain_->fid, "Domain"); + } + catch (fabric_error& e) + { + std::cout << "fabric domain close failed : Ensure all RMA " + "objects are freed before program termination" + << std::endl; + } + fidclose(&fabric_->fid, "Fabric"); - fi_freeinfo(fabric_info_); - } + // clean up + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); - // -------------------------------------------------------------------- - // setup an endpoint for receiving messages, - // usually an rx endpoint is shared by all threads - endpoint_wrapper create_rx_endpoint(struct fid_domain* domain, struct fi_info* info, - struct fid_av* av) - { - auto ep_rx = new_endpoint_active(domain, info, false); + fi_freeinfo(fabric_info_); + } - // bind address vector - bind_address_vector_to_endpoint(ep_rx, av); + // -------------------------------------------------------------------- + // setup an endpoint for receiving messages, + // usually an rx endpoint is shared by all threads + endpoint_wrapper create_rx_endpoint( + struct fid_domain* domain, struct fi_info* info, struct fid_av* av) + { + auto ep_rx = new_endpoint_active(domain, info, false); - // create a completion queue for the rx endpoint - info->rx_attr->op_flags |= FI_COMPLETION; - auto rx_cq = create_completion_queue(domain, info->rx_attr->size, "rx"); + // bind address vector + bind_address_vector_to_endpoint(ep_rx, av); - // bind CQ to endpoint - bind_queue_to_endpoint(ep_rx, rx_cq, FI_RECV, "rx"); - return endpoint_wrapper(ep_rx, rx_cq, nullptr, "rx"); - } + // create a completion queue for the rx endpoint + info->rx_attr->op_flags |= FI_COMPLETION; + auto rx_cq = create_completion_queue(domain, info->rx_attr->size, "rx"); - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - template - void initialize(std::string const& provider, bool rootnode, int size, size_t threads, - Args&&... args) - { - LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // bind CQ to endpoint + bind_queue_to_endpoint(ep_rx, rx_cq, FI_RECV, "rx"); + return endpoint_wrapper(ep_rx, rx_cq, nullptr, "rx"); + } - max_completions_per_poll_ = libfabric_completions_per_poll(); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); + // -------------------------------------------------------------------- + // initialize the basic fabric/domain/name + template + void initialize( + std::string const& provider, bool rootnode, int size, size_t threads, Args&&... args) + { + LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; - msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Rendezvous threshold"), debug::hex<4>(msg_rendezvous_threshold_))); + max_completions_per_poll_ = libfabric_completions_per_poll(); + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); - endpoint_type_ = static_cast(libfabric_endpoint_type()); - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); + uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; + msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("Rendezvous threshold"), + debug::hex<4>(msg_rendezvous_threshold_))); - eps_ = std::make_unique(); + endpoint_type_ = static_cast(libfabric_endpoint_type()); + LF_DEB( + NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); + eps_ = std::make_unique(); - open_fabric(provider, threads, rootnode); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); - // create an address vector that will be bound to (all) endpoints - av_ = create_address_vector(fabric_info_, size, threads); + open_fabric(provider, threads, rootnode); - // we need an rx endpoint in all cases except scalable rx - if (endpoint_type_ != endpoint_type::scalableTxRx) - { - // setup an endpoint for receiving messages - // rx endpoint is typically shared by all threads - eps_->ep_rx_ = create_rx_endpoint(fabric_domain_, fabric_info_, av_); - } + // create an address vector that will be bound to (all) endpoints + av_ = create_address_vector(fabric_info_, size, threads); - if (endpoint_type_ == endpoint_type::single) - { - // always bind a tx cq to the rx endpoint for single endpoint type - auto tx_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(tx_cq); - } - else if (endpoint_type_ != endpoint_type::scalableTxRx) - { + // we need an rx endpoint in all cases except scalable rx + if (endpoint_type_ != endpoint_type::scalableTxRx) + { + // setup an endpoint for receiving messages + // rx endpoint is typically shared by all threads + eps_->ep_rx_ = create_rx_endpoint(fabric_domain_, fabric_info_, av_); + } + + if (endpoint_type_ == endpoint_type::single) + { + // always bind a tx cq to the rx endpoint for single endpoint type + auto tx_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); + eps_->ep_rx_.set_tx_cq(tx_cq); + } + else if (endpoint_type_ != endpoint_type::scalableTxRx) + { #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || defined(HAVE_LIBFABRIC_EFA) - // it appears that the rx endpoint cannot be enabled if it does not - // have a Tx CQ (at least when using sockets), so we create a dummy - // Tx CQ and bind it just to stop libfabric from triggering an error. - // The tx_cq won't actually be used because the user will get the real - // tx endpoint which will have the correct cq bound to it - auto dummy_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); - eps_->ep_rx_.set_tx_cq(dummy_cq); + // it appears that the rx endpoint cannot be enabled if it does not + // have a Tx CQ (at least when using sockets), so we create a dummy + // Tx CQ and bind it just to stop libfabric from triggering an error. + // The tx_cq won't actually be used because the user will get the real + // tx endpoint which will have the correct cq bound to it + auto dummy_cq = bind_tx_queue_to_rx_endpoint(fabric_info_, eps_->ep_rx_.get_ep()); + eps_->ep_rx_.set_tx_cq(dummy_cq); #endif - } + } - if (endpoint_type_ == endpoint_type::multiple) - { - // create a separate Tx endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (appears to be a progress related bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = - create_completion_queue(fabric_domain_, fabric_info_->tx_attr->size, "tx multiple"); - - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx multiple"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx multiple"); - - // combine endpoints and CQ into wrapper for convenience - eps_->ep_tx_ = endpoint_wrapper(ep_tx, nullptr, tx_cq, "tx multiple"); - } - else if (endpoint_type_ == endpoint_type::threadlocalTx) - { - // each thread creates a Tx endpoint on first call to get_tx_endpoint() - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - // setup tx contexts for each possible thread - size_t threads_allocated = 0; - auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, - threads_allocated); + if (endpoint_type_ == endpoint_type::multiple) + { + // create a separate Tx endpoint for sending messages + // note that the CQ needs FI_RECV even though its a Tx cq to keep + // some providers happy as they trigger an error if an endpoint + // has no Rx cq attached (appears to be a progress related bug) + auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - "Contexts allocated", debug::dec<4>(threads_allocated))); + // create a completion queue for tx endpoint + fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + auto tx_cq = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx multiple"); - finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); + bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx multiple"); + bind_address_vector_to_endpoint(ep_tx, av_); + enable_endpoint(ep_tx, "tx multiple"); - // prepare the stack for insertions - tx_endpoints_.reserve(threads_allocated); - // - for (unsigned int i = 0; i < threads_allocated; i++) + // combine endpoints and CQ into wrapper for convenience + eps_->ep_tx_ = endpoint_wrapper(ep_tx, nullptr, tx_cq, "tx multiple"); + } + else if (endpoint_type_ == endpoint_type::threadlocalTx) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + // each thread creates a Tx endpoint on first call to get_tx_endpoint() + } + else if (endpoint_type_ == endpoint_type::scalableTx || + endpoint_type_ == endpoint_type::scalableTxRx) + { + // setup tx contexts for each possible thread + size_t threads_allocated = 0; + auto ep_sx = new_endpoint_scalable( + fabric_domain_, fabric_info_, true /*Tx*/, threads, threads_allocated); + + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("scalable endpoint ok"), "Contexts allocated", + debug::dec<4>(threads_allocated))); - // For threadlocal/scalable endpoints, tx/rx resources - fid_ep* scalable_ep_tx; - fid_cq* scalable_cq_tx; + finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", + fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - // Create a Tx context, cq, bind and enable - finvoke("create tx context", "fi_tx_context", - fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - scalable_cq_tx = create_completion_queue(fabric_domain_, - fabric_info_->tx_attr->size, "tx scalable"); - bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - enable_endpoint(scalable_ep_tx, "tx scalable"); + // prepare the stack for insertions + tx_endpoints_.reserve(threads_allocated); + // + for (unsigned int i = 0; i < threads_allocated; i++) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + + // For threadlocal/scalable endpoints, tx/rx resources + fid_ep* scalable_ep_tx; + fid_cq* scalable_cq_tx; + + // Create a Tx context, cq, bind and enable + finvoke("create tx context", "fi_tx_context", + fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); + scalable_cq_tx = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx scalable"); + bind_queue_to_endpoint( + scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); + enable_endpoint(scalable_ep_tx, "tx scalable"); + + endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), + "rx cq", NS_DEBUG::ptr(tx.get_rx_cq()))); + tx_endpoints_.push(tx); + } - endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(tx.get_rx_cq()))); - tx_endpoints_.push(tx); + eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); } - eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); + // once enabled we can get the address + enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); + here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); + + // // if we are using scalable endpoints, then setup tx/rx contexts + // // we will us a single endpoint for all Tx/Rx contexts + // if (endpoint_type_ == endpoint_type::scalableTx || + // endpoint_type_ == endpoint_type::scalableTxRx) + // { + + // // thread slots might not be same as what we asked for + // size_t threads_allocated = 0; + // auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, + // threads_allocated); + // if (!ep_sx) + // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); + + // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), + // "Contexts allocated", debug::dec<4>(threads_allocated))); + + // // prepare the stack for insertions + // tx_endpoints_.reserve(threads_allocated); + // rx_endpoints_.reserve(threads_allocated); + // // + // for (unsigned int i = 0; i < threads_allocated; i++) + // { + // [[maybe_unused]] auto scp = + // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + + // // For threadlocal/scalable endpoints, tx/rx resources + // fid_ep* scalable_ep_tx; + // fid_cq* scalable_cq_tx; + //// fid_ep* scalable_ep_rx; + //// fid_cq* scalable_cq_rx; + + // // Tx context setup + // finvoke("create tx context", "fi_tx_context", + // fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); + + // scalable_cq_tx = create_completion_queue(fabric_domain_, + // fabric_info_->tx_attr->size, "tx scalable"); + + // bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); + + // enable_endpoint(scalable_ep_tx, "tx scalable"); + + // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); + // LF_DEB(NS_DEBUG::cnb_deb, + // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", + // NS_DEBUG::ptr(tx.get_rx_cq()))); + // tx_endpoints_.push(tx); + + // // Rx contexts + //// finvoke("create rx context", "fi_rx_context", + //// fi_rx_context(ep_sx, i, NULL, &scalable_ep_rx, NULL)); + + //// scalable_cq_rx = + //// create_completion_queue(fabric_domain_, fabric_info_->rx_attr->size, "rx"); + + //// bind_queue_to_endpoint(scalable_ep_rx, scalable_cq_rx, FI_RECV, "rx scalable"); + + //// enable_endpoint(scalable_ep_rx, "rx scalable"); + + //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); + //// LF_DEB(NS_DEBUG::cnb_deb, + //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", + //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", + //// NS_DEBUG::ptr(rx.get_rx_cq()))); + //// rx_endpoints_.push(rx); + // } + + // finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", + // fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); + + // eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); + + return static_cast(this)->initialize_derived( + provider, rootnode, size, threads, std::forward(args)...); } - // once enabled we can get the address - enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); - here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); - - // // if we are using scalable endpoints, then setup tx/rx contexts - // // we will us a single endpoint for all Tx/Rx contexts - // if (endpoint_type_ == endpoint_type::scalableTx || - // endpoint_type_ == endpoint_type::scalableTxRx) - // { - - // // thread slots might not be same as what we asked for - // size_t threads_allocated = 0; - // auto ep_sx = new_endpoint_scalable(fabric_domain_, fabric_info_, true /*Tx*/, threads, - // threads_allocated); - // if (!ep_sx) - // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); - - // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - // "Contexts allocated", debug::dec<4>(threads_allocated))); - - // // prepare the stack for insertions - // tx_endpoints_.reserve(threads_allocated); - // rx_endpoints_.reserve(threads_allocated); - // // - // for (unsigned int i = 0; i < threads_allocated; i++) - // { - // [[maybe_unused]] auto scp = - // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); - - // // For threadlocal/scalable endpoints, tx/rx resources - // fid_ep* scalable_ep_tx; - // fid_cq* scalable_cq_tx; - //// fid_ep* scalable_ep_rx; - //// fid_cq* scalable_cq_rx; - - // // Tx context setup - // finvoke("create tx context", "fi_tx_context", - // fi_tx_context(ep_sx, i, NULL, &scalable_ep_tx, NULL)); - - // scalable_cq_tx = create_completion_queue(fabric_domain_, - // fabric_info_->tx_attr->size, "tx scalable"); - - // bind_queue_to_endpoint(scalable_ep_tx, scalable_cq_tx, FI_TRANSMIT, "tx scalable"); - - // enable_endpoint(scalable_ep_tx, "tx scalable"); - - // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - // LF_DEB(NS_DEBUG::cnb_deb, - // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", - // NS_DEBUG::ptr(tx.get_rx_cq()))); - // tx_endpoints_.push(tx); - - // // Rx contexts - //// finvoke("create rx context", "fi_rx_context", - //// fi_rx_context(ep_sx, i, NULL, &scalable_ep_rx, NULL)); - - //// scalable_cq_rx = - //// create_completion_queue(fabric_domain_, fabric_info_->rx_attr->size, "rx"); - - //// bind_queue_to_endpoint(scalable_ep_rx, scalable_cq_rx, FI_RECV, "rx scalable"); - - //// enable_endpoint(scalable_ep_rx, "rx scalable"); - - //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); - //// LF_DEB(NS_DEBUG::cnb_deb, - //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", - //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", - //// NS_DEBUG::ptr(rx.get_rx_cq()))); - //// rx_endpoints_.push(rx); - // } - - // finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", - // fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); - - // eps_->ep_tx_ = endpoint_wrapper(ep_sx, nullptr, nullptr, "rx scalable"); - - return static_cast(this)->initialize_derived(provider, rootnode, size, threads, - std::forward(args)...); - } - - // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } + // -------------------------------------------------------------------- + constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } - // -------------------------------------------------------------------- - constexpr fi_threading threadlevel_flags() - { - return static_cast(this)->threadlevel_flags(); - } + // -------------------------------------------------------------------- + constexpr fi_threading threadlevel_flags() + { + return static_cast(this)->threadlevel_flags(); + } - // -------------------------------------------------------------------- - constexpr std::int64_t memory_registration_mode_flags() - { - std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + // -------------------------------------------------------------------- + constexpr std::int64_t memory_registration_mode_flags() + { + std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE - base_flags = base_flags | FI_MR_HMEM; + base_flags = base_flags | FI_MR_HMEM; #endif - base_flags = base_flags | FI_MR_LOCAL; + base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; #elif defined(HAVE_LIBFABRIC_EFA) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; #else - return base_flags; + return base_flags; #endif - } - - // -------------------------------------------------------------------- - uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - void open_fabric(std::string const& provider, int threads, bool rootnode) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + } - struct fi_info* fabric_hints_ = fi_allocinfo(); - if (!fabric_hints_) + // -------------------------------------------------------------------- + uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } + // -------------------------------------------------------------------- + // initialize the basic fabric/domain/name + void open_fabric(std::string const& provider, int threads, bool rootnode) { - throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); - } + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); + struct fi_info* fabric_hints_ = fi_allocinfo(); + if (!fabric_hints_) + { + throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); + } + + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || defined(HAVE_LIBFABRIC_VERBS) - fabric_hints_->addr_format = FI_SOCKADDR_IN; + fabric_hints_->addr_format = FI_SOCKADDR_IN; #elif defined(HAVE_LIBFABRIC_EFA) - fabric_hints_->addr_format = FI_ADDR_EFA; + fabric_hints_->addr_format = FI_ADDR_EFA; #endif - fabric_hints_->caps = caps_flags(); + fabric_hints_->caps = caps_flags(); - fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; - if (provider.c_str() == std::string("tcp")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else if (provider.c_str() == std::string("verbs")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; + if (provider.c_str() == std::string("tcp")) + { + fabric_hints_->fabric_attr->prov_name = + strdup(std::string(provider + ";ofi_rxm").c_str()); + } + else if (provider.c_str() == std::string("verbs")) + { + fabric_hints_->fabric_attr->prov_name = + strdup(std::string(provider + ";ofi_rxm").c_str()); + } + else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); - fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); - // Enable/Disable the use of progress threads - auto progress = libfabric_progress_type(); - fabric_hints_->domain_attr->control_progress = progress; - fabric_hints_->domain_attr->data_progress = progress; - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); + // Enable/Disable the use of progress threads + auto progress = libfabric_progress_type(); + fabric_hints_->domain_attr->control_progress = progress; + fabric_hints_->domain_attr->data_progress = progress; + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); - if (threads > 1) - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); - // Enable thread safe mode (Does not work with psm2 provider) - // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - //fabric_hints_->domain_attr->threading = FI_THREAD_FID; - fabric_hints_->domain_attr->threading = threadlevel_flags(); - } - else - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); - // we serialize everything - fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; - } + if (threads > 1) + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); + // Enable thread safe mode (Does not work with psm2 provider) + // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; + //fabric_hints_->domain_attr->threading = FI_THREAD_FID; + fabric_hints_->domain_attr->threading = threadlevel_flags(); + } + else + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); + // we serialize everything + fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; + } - // Enable resource management - fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; + // Enable resource management + fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); - fabric_hints_->ep_attr->type = FI_EP_RDM; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); + fabric_hints_->ep_attr->type = FI_EP_RDM; - uint64_t flags = 0; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("get fabric info"), "FI_VERSION", - debug::dec(LIBFABRIC_FI_VERSION_MAJOR), debug::dec(LIBFABRIC_FI_VERSION_MINOR))); + uint64_t flags = 0; + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("get fabric info"), "FI_VERSION", + debug::dec(LIBFABRIC_FI_VERSION_MAJOR), + debug::dec(LIBFABRIC_FI_VERSION_MINOR))); - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, fabric_hints_, &fabric_info_); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, fabric_hints_, &fabric_info_); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); - if (rootnode) - { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); - } + if (rootnode) + { + LF_DEB(NS_DEBUG::cnb_err, + trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + } - bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); + bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); - mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); + mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); - mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); + mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); - /* Check if provider requires heterogeneous memory registration */ - mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); + /* Check if provider requires heterogeneous memory registration */ + mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); - bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); - ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); + ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); - // Allocate a domain. - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); - ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); + // Allocate a domain. + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); + ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); #if defined(HAVE_LIBFABRIC_GNI) - { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); - - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); - // Dump out all vars for debug purposes - for (auto& gni_data : gni_strs) { - _set_check_domain_op_value(gni_data.first, 0, gni_data.second.c_str(), - false); - } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); - for (auto& gni_data : gni_ints) - { - _set_check_domain_op_value(gni_data.first, 0, gni_data.second.c_str(), - false); - } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); - - // -------------------------- - // GNI_MR_CACHE - // set GNI mem reg to be either none, internal or udreg - // - _set_check_domain_op_value(GNI_MR_CACHE, const_cast(OOMPH_GNI_REG), - "GNI_MR_CACHE"); - - // -------------------------- - // GNI_MR_UDREG_REG_LIMIT - // Experiments showed default value of 2048 too high if - // launching multiple clients on one node - // - int32_t udreg_limit = 0x0800; // 0x0400 = 1024, 0x0800 = 2048 - _set_check_domain_op_value(GNI_MR_UDREG_REG_LIMIT, udreg_limit, - "GNI_MR_UDREG_REG_LIMIT"); - - // -------------------------- - // GNI_MR_CACHE_LAZY_DEREG - // Enable lazy deregistration in MR cache - // - int32_t enable = 1; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); - _set_check_domain_op_value(GNI_MR_CACHE_LAZY_DEREG, enable, - "GNI_MR_CACHE_LAZY_DEREG"); + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); - // -------------------------- - // GNI_MSG_RENDEZVOUS_THRESHOLD (c.f. GNI_RMA_RDMA_THRESHOLD) - // - int32_t thresh = msg_rendezvous_threshold_; - _set_check_domain_op_value(GNI_MSG_RENDEZVOUS_THRESHOLD, thresh, - "GNI_MSG_RENDEZVOUS_THRESHOLD"); - } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); + // Dump out all vars for debug purposes + for (auto& gni_data : gni_strs) + { + _set_check_domain_op_value( + gni_data.first, 0, gni_data.second.c_str(), false); + } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); + for (auto& gni_data : gni_ints) + { + _set_check_domain_op_value( + gni_data.first, 0, gni_data.second.c_str(), false); + } + LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); + + // -------------------------- + // GNI_MR_CACHE + // set GNI mem reg to be either none, internal or udreg + // + _set_check_domain_op_value( + GNI_MR_CACHE, const_cast(OOMPH_GNI_REG), "GNI_MR_CACHE"); + + // -------------------------- + // GNI_MR_UDREG_REG_LIMIT + // Experiments showed default value of 2048 too high if + // launching multiple clients on one node + // + int32_t udreg_limit = 0x0800; // 0x0400 = 1024, 0x0800 = 2048 + _set_check_domain_op_value( + GNI_MR_UDREG_REG_LIMIT, udreg_limit, "GNI_MR_UDREG_REG_LIMIT"); + + // -------------------------- + // GNI_MR_CACHE_LAZY_DEREG + // Enable lazy deregistration in MR cache + // + int32_t enable = 1; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); + _set_check_domain_op_value( + GNI_MR_CACHE_LAZY_DEREG, enable, "GNI_MR_CACHE_LAZY_DEREG"); + + // -------------------------- + // GNI_MSG_RENDEZVOUS_THRESHOLD (c.f. GNI_RMA_RDMA_THRESHOLD) + // + int32_t thresh = msg_rendezvous_threshold_; + _set_check_domain_op_value( + GNI_MSG_RENDEZVOUS_THRESHOLD, thresh, "GNI_MSG_RENDEZVOUS_THRESHOLD"); + } #endif - tx_inject_size_ = fabric_info_->tx_attr->inject_size; + tx_inject_size_ = fabric_info_->tx_attr->inject_size; - // the number of preposted receives, and sender queue depth - // is set by querying the tx/tx attr sizes - tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); - rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); - fi_freeinfo(fabric_hints_); - } + // the number of preposted receives, and sender queue depth + // is set by querying the tx/tx attr sizes + tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); + rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); + fi_freeinfo(fabric_hints_); + } - // -------------------------------------------------------------------- - struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) - { - return static_cast(this)->set_src_dst_addresses(info, tx); - } + // -------------------------------------------------------------------- + struct fi_info* set_src_dst_addresses(struct fi_info* info, bool tx) + { + return static_cast(this)->set_src_dst_addresses(info, tx); + } #ifdef HAVE_LIBFABRIC_GNI - // -------------------------------------------------------------------- - // Special GNI extensions to disable memory registration cache - - // if set is false, the old value is returned and nothing is set - template - int _set_check_domain_op_value(int op, T value, const char* info, bool set = true) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - static struct fi_gni_ops_domain* gni_domain_ops = nullptr; - int ret = 0; + // -------------------------------------------------------------------- + // Special GNI extensions to disable memory registration cache - if (gni_domain_ops == nullptr) + // if set is false, the old value is returned and nothing is set + template + int _set_check_domain_op_value(int op, T value, char const* info, bool set = true) { - ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, (void**)&gni_domain_ops, - nullptr); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), - NS_DEBUG::ptr(gni_domain_ops))); - } + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + static struct fi_gni_ops_domain* gni_domain_ops = nullptr; + int ret = 0; - // if open was ok and set flag is present, then set value - if (ret == 0 && set) - { - ret = gni_domain_ops->set_val(&fabric_domain_->fid, (dom_ops_val_t)(op), - reinterpret_cast(&value)); + if (gni_domain_ops == nullptr) + { + ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, + (void**) &gni_domain_ops, nullptr); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), + NS_DEBUG::ptr(gni_domain_ops))); + } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); - } + // if open was ok and set flag is present, then set value + if (ret == 0 && set) + { + ret = gni_domain_ops->set_val( + &fabric_domain_->fid, (dom_ops_val_t) (op), reinterpret_cast(&value)); - // Get the value (so we can check that the value we set is now returned) - T new_value; - ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t)(op), &new_value); - if constexpr (std::is_integral::value) - { - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), - info, debug::hex<8>(new_value))); - } - else - { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); - } - // - if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); + } - return ret; - } + // Get the value (so we can check that the value we set is now returned) + T new_value; + ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t) (op), &new_value); + if constexpr (std::is_integral::value) + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, + debug::hex<8>(new_value))); + } + else + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); + } + // + if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); + + return ret; + } #endif - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_active(struct fid_domain* domain, struct fi_info* info, bool tx) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); + // -------------------------------------------------------------------- + struct fid_ep* new_endpoint_active(struct fid_domain* domain, struct fi_info* info, bool tx) + { + // don't allow multiple threads to call endpoint create at the same time + scoped_lock lock(controller_mutex_); - // make sure src_addr/dst_addr are set accordingly - // and we do not create two endpoint with the same src address - struct fi_info* hints = set_src_dst_addresses(info, tx); + // make sure src_addr/dst_addr are set accordingly + // and we do not create two endpoint with the same src address + struct fi_info* hints = set_src_dst_addresses(info, tx); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); - struct fid_ep* ep; - int ret = fi_endpoint(domain, hints, &ep, nullptr); - if (ret) - { - throw NS_LIBFABRIC::fabric_error(ret, "fi_endpoint (too many threadlocal " - "endpoints?)"); + struct fid_ep* ep; + int ret = fi_endpoint(domain, hints, &ep, nullptr); + if (ret) + { + throw NS_LIBFABRIC::fabric_error(ret, + "fi_endpoint (too many threadlocal " + "endpoints?)"); + } + fi_freeinfo(hints); + LF_DEB( + NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); + return ep; } - fi_freeinfo(hints); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); - return ep; - } - // -------------------------------------------------------------------- - struct fid_ep* new_endpoint_scalable(struct fid_domain* domain, struct fi_info* info, bool tx, - size_t threads, size_t& threads_allocated) - { - // don't allow multiple threads to call endpoint create at the same time - scoped_lock lock(controller_mutex_); + // -------------------------------------------------------------------- + struct fid_ep* new_endpoint_scalable(struct fid_domain* domain, struct fi_info* info, + bool tx, size_t threads, size_t& threads_allocated) + { + // don't allow multiple threads to call endpoint create at the same time + scoped_lock lock(controller_mutex_); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); - struct fi_info* hints = fi_dupinfo(info); - if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + struct fi_info* hints = fi_dupinfo(info); + if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); - int flags = 0; - struct fi_info* new_hints = nullptr; - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), - nullptr, nullptr, flags, hints, &new_hints); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_getinfo"); + int flags = 0; + struct fi_info* new_hints = nullptr; + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, hints, &new_hints); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_getinfo"); - // Check the optimal number of TX/RX contexts supported by the provider - size_t context_count = 0; - if (tx) { context_count = std::min(new_hints->domain_attr->tx_ctx_cnt, threads); } - else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } + // Check the optimal number of TX/RX contexts supported by the provider + size_t context_count = 0; + if (tx) { context_count = std::min(new_hints->domain_attr->tx_ctx_cnt, threads); } + else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } - // clang-format off + // clang-format off LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint"), "Tx", tx, @@ -1060,440 +1061,451 @@ class controller_base "tx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->tx_ctx_cnt), "rx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->rx_ctx_cnt), "context_count", debug::dec<3>(context_count))); - // clang-format on - - threads_allocated = context_count; - new_hints->ep_attr->tx_ctx_cnt = context_count; - new_hints->ep_attr->rx_ctx_cnt = context_count; - - struct fid_ep* ep; - ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); - fi_freeinfo(hints); - return ep; - } - - // -------------------------------------------------------------------- - endpoint_wrapper& get_rx_endpoint() - { - static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); - LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + // clang-format on + + threads_allocated = context_count; + new_hints->ep_attr->tx_ctx_cnt = context_count; + new_hints->ep_attr->rx_ctx_cnt = context_count; + + struct fid_ep* ep; + ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); + LF_DEB( + NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); + fi_freeinfo(hints); + return ep; + } - if (endpoint_type_ == endpoint_type::scalableTxRx) + // -------------------------------------------------------------------- + endpoint_wrapper& get_rx_endpoint() { - if (eps_->tl_srx_.get_ep() == nullptr) + static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); + LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + + if (endpoint_type_ == endpoint_type::scalableTxRx) { - endpoint_wrapper ep; - bool ok = rx_endpoints_.pop(ep); - if (!ok) + if (eps_->tl_srx_.get_ep() == nullptr) { - // clang-format off + endpoint_wrapper ep; + bool ok = rx_endpoints_.pop(ep); + if (!ok) + { + // clang-format off LF_DEB(NS_DEBUG::cnb_deb, error(debug::str<>("Scalable Ep"), "pop rx", "ep", NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); - // clang-format on - throw std::runtime_error("rx endpoint wrapper pop fail"); + // clang-format on + throw std::runtime_error("rx endpoint wrapper pop fail"); + } + eps_->tl_srx_ = stack_endpoint( + ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &rx_endpoints_); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Scalable Ep"), "pop rx", "ep", + NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", + NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", + NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); } - eps_->tl_srx_ = stack_endpoint(ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), - ep.get_name(), &rx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("Scalable Ep"), "pop rx", "ep", - NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); + return eps_->tl_srx_.endpoint_; } - return eps_->tl_srx_.endpoint_; + // otherwise just return the normal Rx endpoint + return eps_->ep_rx_; } - // otherwise just return the normal Rx endpoint - return eps_->ep_rx_; - } - // -------------------------------------------------------------------- - endpoint_wrapper& get_tx_endpoint() - { - if (endpoint_type_ == endpoint_type::threadlocalTx) + // -------------------------------------------------------------------- + endpoint_wrapper& get_tx_endpoint() { - if (eps_->tl_tx_.get_ep() == nullptr) + if (endpoint_type_ == endpoint_type::threadlocalTx) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); - - // create a completion queue for tx endpoint - fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - auto tx_cq = create_completion_queue(fabric_domain_, fabric_info_->tx_attr->size, - "tx threadlocal"); - - // setup an endpoint for sending messages - // note that the CQ needs FI_RECV even though its a Tx cq to keep - // some providers happy as they trigger an error if an endpoint - // has no Rx cq attached (progress bug) - auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); - bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx threadlocal"); - bind_address_vector_to_endpoint(ep_tx, av_); - enable_endpoint(ep_tx, "tx threadlocal"); - - // set threadlocal endpoint wrapper - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", NS_DEBUG::ptr(ep_tx), - "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", NS_DEBUG::ptr(nullptr))); - // for cleaning up at termination - endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); - tx_endpoints_.push(ep); - eps_->tl_tx_ = stack_endpoint(ep_tx, nullptr, tx_cq, "threadlocal", nullptr); + if (eps_->tl_tx_.get_ep() == nullptr) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); + + // create a completion queue for tx endpoint + fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + auto tx_cq = create_completion_queue( + fabric_domain_, fabric_info_->tx_attr->size, "tx threadlocal"); + + // setup an endpoint for sending messages + // note that the CQ needs FI_RECV even though its a Tx cq to keep + // some providers happy as they trigger an error if an endpoint + // has no Rx cq attached (progress bug) + auto ep_tx = new_endpoint_active(fabric_domain_, fabric_info_, true); + bind_queue_to_endpoint(ep_tx, tx_cq, FI_TRANSMIT | FI_RECV, "tx threadlocal"); + bind_address_vector_to_endpoint(ep_tx, av_); + enable_endpoint(ep_tx, "tx threadlocal"); + + // set threadlocal endpoint wrapper + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", + NS_DEBUG::ptr(ep_tx), "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", + NS_DEBUG::ptr(nullptr))); + // for cleaning up at termination + endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); + tx_endpoints_.push(ep); + eps_->tl_tx_ = stack_endpoint(ep_tx, nullptr, tx_cq, "threadlocal", nullptr); + } + return eps_->tl_tx_.endpoint_; } - return eps_->tl_tx_.endpoint_; - } - else if (endpoint_type_ == endpoint_type::scalableTx || - endpoint_type_ == endpoint_type::scalableTxRx) - { - if (eps_->tl_stx_.get_ep() == nullptr) + else if (endpoint_type_ == endpoint_type::scalableTx || + endpoint_type_ == endpoint_type::scalableTxRx) { - endpoint_wrapper ep; - bool ok = tx_endpoints_.pop(ep); - if (!ok) + if (eps_->tl_stx_.get_ep() == nullptr) { + endpoint_wrapper ep; + bool ok = tx_endpoints_.pop(ep); + if (!ok) + { + LF_DEB(NS_DEBUG::cnb_deb, + error(debug::str<>("Scalable Ep"), "pop tx", "ep", + NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), + "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + throw std::runtime_error("tx endpoint wrapper pop fail"); + } + eps_->tl_stx_ = stack_endpoint( + ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &tx_endpoints_); LF_DEB(NS_DEBUG::cnb_deb, - error(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); - throw std::runtime_error("tx endpoint wrapper pop fail"); + trace(debug::str<>("Scalable Ep"), "pop tx", "ep", + NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", + NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", + NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); } - eps_->tl_stx_ = stack_endpoint(ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), - ep.get_name(), &tx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); + return eps_->tl_stx_.endpoint_; } - return eps_->tl_stx_.endpoint_; + else if (endpoint_type_ == endpoint_type::multiple) { return eps_->ep_tx_; } + // single : shared tx/rx endpoint + return eps_->ep_rx_; } - else if (endpoint_type_ == endpoint_type::multiple) { return eps_->ep_tx_; } - // single : shared tx/rx endpoint - return eps_->ep_rx_; - } - - // -------------------------------------------------------------------- - void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); - int ret = fi_ep_bind(endpoint, &av->fid, 0); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); - } - - // -------------------------------------------------------------------- - void bind_queue_to_endpoint(struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, - const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); - int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); - } + // -------------------------------------------------------------------- + void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - // -------------------------------------------------------------------- - fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); - fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); - // shared send/recv endpoint - bind send cq to the recv endpoint - bind_queue_to_endpoint(ep, tx_cq, FI_TRANSMIT, "tx->rx bug fix"); - return tx_cq; - } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); + int ret = fi_ep_bind(endpoint, &av->fid, 0); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); + } - // -------------------------------------------------------------------- - void enable_endpoint(struct fid_ep* endpoint, const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + // -------------------------------------------------------------------- + void bind_queue_to_endpoint( + struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, char const* type) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); - int ret = fi_enable(endpoint); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); - } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); + int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); + } - // -------------------------------------------------------------------- - locality get_endpoint_address(struct fid* id) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); + fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); + // shared send/recv endpoint - bind send cq to the recv endpoint + bind_queue_to_endpoint(ep, tx_cq, FI_TRANSMIT, "tx->rx bug fix"); + return tx_cq; + } - locality::locality_data local_addr; - std::size_t addrlen = locality_defs::array_size; - int ret = fi_getname(id, local_addr.data(), &addrlen); - if (ret || (addrlen > locality_defs::array_size)) + // -------------------------------------------------------------------- + void enable_endpoint(struct fid_ep* endpoint, char const* type) { - std::string err = - std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); - NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); + int ret = fi_enable(endpoint); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); } - // optimized out when debug logging is false - if constexpr (NS_DEBUG::cnb_deb.is_enabled()) + // -------------------------------------------------------------------- + locality get_endpoint_address(struct fid* id) { - std::stringstream temp1; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + + locality::locality_data local_addr; + std::size_t addrlen = locality_defs::array_size; + int ret = fi_getname(id, local_addr.data(), &addrlen); + if (ret || (addrlen > locality_defs::array_size)) { - temp1 << debug::ipaddr(&local_addr[i]) << " - "; + std::string err = + std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); + NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), "size", - debug::dec<>(addrlen), " : ", temp1.str().c_str())); - std::stringstream temp2; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) + // optimized out when debug logging is false + if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - temp2 << debug::hex<8>(local_addr[i]) << " - "; + std::stringstream temp1; + for (std::size_t i = 0; i < locality_defs::array_length; ++i) + { + temp1 << debug::ipaddr(&local_addr[i]) << " - "; + } + + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("raw address data"), "size", debug::dec<>(addrlen), " : ", + temp1.str().c_str())); + std::stringstream temp2; + for (std::size_t i = 0; i < locality_defs::array_length; ++i) + { + temp2 << debug::hex<8>(local_addr[i]) << " - "; + } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("raw address data"), temp2.str().c_str())); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), temp2.str().c_str())); + return locality(local_addr); } - return locality(local_addr); - } - // -------------------------------------------------------------------- - fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - struct fid_pep* ep; - int ret = fi_passive_ep(fabric, info, &ep, nullptr); - if (ret) { throw NS_LIBFABRIC::fabric_error(ret, "Failed to create fi_passive_ep"); } - return ep; - } + struct fid_pep* ep; + int ret = fi_passive_ep(fabric, info, &ep, nullptr); + if (ret) { throw NS_LIBFABRIC::fabric_error(ret, "Failed to create fi_passive_ep"); } + return ep; + } - // -------------------------------------------------------------------- - inline const locality& here() const { return here_; } + // -------------------------------------------------------------------- + inline locality const& here() const { return here_; } - // -------------------------------------------------------------------- - inline const fi_addr_t& fi_address() const { return here_.fi_address(); } + // -------------------------------------------------------------------- + inline fi_addr_t const& fi_address() const { return here_.fi_address(); } - // -------------------------------------------------------------------- - inline void setHere(const locality& val) { here_ = val; } + // -------------------------------------------------------------------- + inline void setHere(locality const& val) { here_ = val; } - // -------------------------------------------------------------------- - inline const locality& root() const { return root_; } + // -------------------------------------------------------------------- + inline locality const& root() const { return root_; } - // -------------------------------------------------------------------- - inline struct fid_domain* get_domain() const { return fabric_domain_; } + // -------------------------------------------------------------------- + inline struct fid_domain* get_domain() const { return fabric_domain_; } - // -------------------------------------------------------------------- - inline std::size_t get_rma_protocol_size() { return 65536; } + // -------------------------------------------------------------------- + inline std::size_t get_rma_protocol_size() { return 65536; } #ifdef DISABLE_FI_INJECT - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return 0; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_inject_size() { return 0; } #else - // -------------------------------------------------------------------- - inline std::size_t get_tx_inject_size() { return tx_inject_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_inject_size() { return tx_inject_size_; } #endif - // -------------------------------------------------------------------- - inline std::size_t get_tx_size() { return tx_attr_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_tx_size() { return tx_attr_size_; } - // -------------------------------------------------------------------- - inline std::size_t get_rx_size() { return rx_attr_size_; } + // -------------------------------------------------------------------- + inline std::size_t get_rx_size() { return rx_attr_size_; } - // -------------------------------------------------------------------- - // returns true when all connections have been disconnected and none are active - inline bool isTerminated() - { - return false; - //return (qp_endpoint_map_.size() == 0); - } + // -------------------------------------------------------------------- + // returns true when all connections have been disconnected and none are active + inline bool isTerminated() + { + return false; + //return (qp_endpoint_map_.size() == 0); + } - // -------------------------------------------------------------------- - void debug_print_av_vector(std::size_t N) - { - locality addr; - std::size_t addrlen = locality_defs::array_size; - for (std::size_t i = 0; i < N; ++i) + // -------------------------------------------------------------------- + void debug_print_av_vector(std::size_t N) { - int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); - addr.set_fi_address(fi_addr_t(i)); - if ((ret == 0) && (addrlen == locality_defs::array_size)) - { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); - } - else + locality addr; + std::size_t addrlen = locality_defs::array_size; + for (std::size_t i = 0; i < N; ++i) { - LF_DEB(NS_DEBUG::cnb_err, - error(debug::str<>("address length"), debug::dec<3>(addrlen), - debug::dec<3>(locality_defs::array_size))); - throw std::runtime_error("debug_print_av_vector : address vector " - "traversal failure"); + int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); + addr.set_fi_address(fi_addr_t(i)); + if ((ret == 0) && (addrlen == locality_defs::array_size)) + { + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); + } + else + { + LF_DEB(NS_DEBUG::cnb_err, + error(debug::str<>("address length"), debug::dec<3>(addrlen), + debug::dec<3>(locality_defs::array_size))); + throw std::runtime_error("debug_print_av_vector : address vector " + "traversal failure"); + } } } - } - // -------------------------------------------------------------------- - inline constexpr bool bypass_tx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_tx_lock() + { #if defined(HAVE_LIBFABRIC_GNI) - return true; + return true; #elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints - return false; + // @todo : cxi provider is not yet thread safe using scalable endpoints + return false; #else - return (threadlevel_flags() == FI_THREAD_SAFE || + return (threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::threadlocalTx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_tx_lock() - { - if (bypass_tx_lock()) return unique_lock(); - return unique_lock(send_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_tx_lock() + { + if (bypass_tx_lock()) return unique_lock(); + return unique_lock(send_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - inline constexpr bool bypass_rx_lock() - { + // -------------------------------------------------------------------- + inline constexpr bool bypass_rx_lock() + { #ifdef HAVE_LIBFABRIC_GNI - return true; + return true; #else - return ( - threadlevel_flags() == FI_THREAD_SAFE || endpoint_type_ == endpoint_type::scalableTxRx); + return (threadlevel_flags() == FI_THREAD_SAFE || + endpoint_type_ == endpoint_type::scalableTxRx); #endif - } + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock get_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock get_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_); + } - // -------------------------------------------------------------------- - inline controller_base::unique_lock try_rx_lock() - { - if (bypass_rx_lock()) return unique_lock(); - return unique_lock(recv_mutex_, std::try_to_lock_t{}); - } + // -------------------------------------------------------------------- + inline controller_base::unique_lock try_rx_lock() + { + if (bypass_rx_lock()) return unique_lock(); + return unique_lock(recv_mutex_, std::try_to_lock_t{}); + } - // -------------------------------------------------------------------- - progress_status poll_for_work_completions(void* user_data) - { - progress_status p{0, 0}; - bool retry = false; - do { - // sends - uint32_t nsend = static_cast(this)->poll_send_queue( - get_tx_endpoint().get_tx_cq(), user_data); - p.m_num_sends += nsend; - retry = (nsend == max_completions_per_poll_); - // recvs - uint32_t nrecv = static_cast(this)->poll_recv_queue( - get_rx_endpoint().get_rx_cq(), user_data); - p.m_num_recvs += nrecv; - retry |= (nrecv == max_completions_per_poll_); - } while (retry); - return p; - } + // -------------------------------------------------------------------- + progress_status poll_for_work_completions(void* user_data) + { + progress_status p{0, 0}; + bool retry = false; + do { + // sends + uint32_t nsend = static_cast(this)->poll_send_queue( + get_tx_endpoint().get_tx_cq(), user_data); + p.m_num_sends += nsend; + retry = (nsend == max_completions_per_poll_); + // recvs + uint32_t nrecv = static_cast(this)->poll_recv_queue( + get_rx_endpoint().get_rx_cq(), user_data); + p.m_num_recvs += nrecv; + retry |= (nrecv == max_completions_per_poll_); + } while (retry); + return p; + } - // -------------------------------------------------------------------- - inline int poll_send_queue(fid_cq* tx_cq, void* user_data) - { - return static_cast(this)->poll_send_queue(tx_cq, user_data); - } + // -------------------------------------------------------------------- + inline int poll_send_queue(fid_cq* tx_cq, void* user_data) + { + return static_cast(this)->poll_send_queue(tx_cq, user_data); + } - // -------------------------------------------------------------------- - inline int poll_recv_queue(fid_cq* rx_cq, void* user_data) - { - return static_cast(this)->poll_recv_queue(rx_cq, user_data); - } + // -------------------------------------------------------------------- + inline int poll_recv_queue(fid_cq* rx_cq, void* user_data) + { + return static_cast(this)->poll_recv_queue(rx_cq, user_data); + } - // -------------------------------------------------------------------- - struct fid_cq* create_completion_queue(struct fid_domain* domain, size_t size, const char* type) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); - - struct fid_cq* cq; - fi_cq_attr cq_attr = {}; - cq_attr.format = FI_CQ_FORMAT_MSG; - cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.wait_cond = FI_CQ_COND_NONE; - cq_attr.size = size; - cq_attr.flags = 0 /*FI_COMPLETION*/; - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); - // open completion queue on fabric domain and set context to null - int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); - return cq; - } + // -------------------------------------------------------------------- + struct fid_cq* create_completion_queue( + struct fid_domain* domain, size_t size, char const* type) + { + [[maybe_unused]] auto scp = + NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + + struct fid_cq* cq; + fi_cq_attr cq_attr = {}; + cq_attr.format = FI_CQ_FORMAT_MSG; + cq_attr.wait_obj = FI_WAIT_NONE; + cq_attr.wait_cond = FI_CQ_COND_NONE; + cq_attr.size = size; + cq_attr.flags = 0 /*FI_COMPLETION*/; + LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); + // open completion queue on fabric domain and set context to null + int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); + return cq; + } - // -------------------------------------------------------------------- - fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) + { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - fid_av* av; - fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; + fid_av* av; + fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; - // number of addresses expected - av_attr.count = N; + // number of addresses expected + av_attr.count = N; - // number of receive contexts used - int rx_ctx_bits = 0; + // number of receive contexts used + int rx_ctx_bits = 0; #ifdef RX_CONTEXTS_SUPPORT - while (num_rx_contexts >> ++rx_ctx_bits) - ; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); + while (num_rx_contexts >> ++rx_ctx_bits); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); #endif - av_attr.rx_ctx_bits = rx_ctx_bits; - // if contexts is nonzero, then we are using a single scalable endpoint - av_attr.ep_per_node = (num_rx_contexts > 0) ? 2 : 0; - - if (info->domain_attr->av_type != FI_AV_UNSPEC) - { - av_attr.type = info->domain_attr->av_type; - } - else - { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); - av_attr.type = FI_AV_TABLE; - } + av_attr.rx_ctx_bits = rx_ctx_bits; + // if contexts is nonzero, then we are using a single scalable endpoint + av_attr.ep_per_node = (num_rx_contexts > 0) ? 2 : 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); - int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); - if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); - return av; - } + if (info->domain_attr->av_type != FI_AV_UNSPEC) + { + av_attr.type = info->domain_attr->av_type; + } + else + { + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); + av_attr.type = FI_AV_TABLE; + } - // -------------------------------------------------------------------- - locality insert_address(const locality& address) { return insert_address(av_, address); } + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); + int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); + return av; + } - // -------------------------------------------------------------------- - locality insert_address(fid_av* av, const locality& address) - { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + // -------------------------------------------------------------------- + locality insert_address(locality const& address) { return insert_address(av_, address); } - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); - fi_addr_t fi_addr = 0xffffffff; - int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); - if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } - else if (ret == 0) + // -------------------------------------------------------------------- + locality insert_address(fid_av* av, locality const& address) { - NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); - NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); + fi_addr_t fi_addr = 0xffff'ffff; + int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); + if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } + else if (ret == 0) + { + NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); + NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); + } + // address was generated correctly, now update the locality with the fi_addr + locality new_locality(address, fi_addr); + LF_DEB(NS_DEBUG::cnb_deb, + trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), + iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); + return new_locality; } - // address was generated correctly, now update the locality with the fi_addr - locality new_locality(address, fi_addr); - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), - iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); - return new_locality; - } -}; + }; -} // namespace NS_LIBFABRIC +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 0f2db4c1..325975a7 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -10,43 +10,41 @@ #pragma once #include -#include #include +#include // #include // #include "oomph_libfabric_defines.hpp" -namespace NS_DEBUG -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print err_deb("ERROR__"); -} // namespace NS_DEBUG +namespace NS_DEBUG { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print err_deb("ERROR__"); +} // namespace NS_DEBUG -namespace NS_LIBFABRIC -{ +namespace NS_LIBFABRIC { -class fabric_error : public std::runtime_error -{ - public: - // -------------------------------------------------------------------- - fabric_error(int err, const std::string& msg) - : std::runtime_error(std::string(fi_strerror(-err)) + msg) - , error_(err) + class fabric_error : public std::runtime_error { - NS_DEBUG::err_deb.error(msg, ":", fi_strerror(-err)); - std::terminate(); - } + public: + // -------------------------------------------------------------------- + fabric_error(int err, std::string const& msg) + : std::runtime_error(std::string(fi_strerror(-err)) + msg) + , error_(err) + { + NS_DEBUG::err_deb.error(msg, ":", fi_strerror(-err)); + std::terminate(); + } - fabric_error(int err) - : std::runtime_error(fi_strerror(-err)) - , error_(-err) - { - NS_DEBUG::err_deb.error(what()); - std::terminate(); - } + fabric_error(int err) + : std::runtime_error(fi_strerror(-err)) + , error_(-err) + { + NS_DEBUG::err_deb.error(what()); + std::terminate(); + } - int error_; -}; + int error_; + }; -} // namespace NS_LIBFABRIC +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/libfabric_defines_template.hpp b/src/libfabric/libfabric_defines_template.hpp index 64c04944..efd2bb67 100644 --- a/src/libfabric/libfabric_defines_template.hpp +++ b/src/libfabric/libfabric_defines_template.hpp @@ -14,26 +14,26 @@ // some namespaces for the lib and for debugging are setup correctly #define NS_LIBFABRIC oomph::libfabric -#define NS_MEMORY oomph::libfabric -#define NS_DEBUG oomph::debug +#define NS_MEMORY oomph::libfabric +#define NS_DEBUG oomph::debug #ifndef LF_DEB -#define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; +# define LF_DEB(printer, Expr) \ + if constexpr (printer.is_enabled()) { printer.Expr; }; #endif #define LFSOURCE_DIR "@OOMPH_SRC_LIBFABRIC_DIR@" -#define LFPRINT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/print.hpp" -#define LFCOUNT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/simple_counter.hpp" +#define LFPRINT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/print.hpp" +#define LFCOUNT_HPP "@OOMPH_SRC_LIBFABRIC_DIR@/simple_counter.hpp" // oomph has a debug print helper file in the main source tree #if __has_include(LFPRINT_HPP) -#include LFPRINT_HPP -#define has_debug 1 +# include LFPRINT_HPP +# define has_debug 1 #endif #if __has_include(LFCOUNT_HPP) -#include LFCOUNT_HPP +# include LFCOUNT_HPP #endif #endif diff --git a/src/libfabric/locality.cpp b/src/libfabric/locality.cpp index 487912f5..ff23eeb5 100644 --- a/src/libfabric/locality.cpp +++ b/src/libfabric/locality.cpp @@ -10,27 +10,22 @@ #include -namespace oomph -{ -namespace libfabric -{ +namespace oomph { namespace libfabric { -// ------------------------------------------------------------------ -// format as ip address, port, libfabric address -// ------------------------------------------------------------------ -iplocality::iplocality(const locality& l) -: data(l) -{ -} + // ------------------------------------------------------------------ + // format as ip address, port, libfabric address + // ------------------------------------------------------------------ + iplocality::iplocality(locality const& l) + : data(l) + { + } -std::ostream& -operator<<(std::ostream& os, const iplocality& p) -{ - os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " - << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" - << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; - return os; -} + std::ostream& operator<<(std::ostream& os, iplocality const& p) + { + os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " + << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" + << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; + return os; + } -} // namespace libfabric -} // namespace oomph +}} // namespace oomph::libfabric diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 74f6b290..84f5ddc2 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -15,243 +15,238 @@ #include #include // -#include -#include #include +#include +#include // #include "oomph_libfabric_defines.hpp" // Different providers use different address formats that we must accommodate // in our locality object. #ifdef HAVE_LIBFABRIC_GNI -#define HAVE_LIBFABRIC_LOCALITY_SIZE 48 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 48 #endif #ifdef HAVE_LIBFABRIC_CXI -#ifdef HAVE_LIBFABRIC_CXI_1_15 -#define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(int) -#else -#define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(long int) -#endif +# ifdef HAVE_LIBFABRIC_CXI_1_15 +# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(int) +# else +# define HAVE_LIBFABRIC_LOCALITY_SIZE sizeof(long int) +# endif #endif #ifdef HAVE_LIBFABRIC_EFA -#define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 #endif #if defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_PSM2) -#define HAVE_LIBFABRIC_LOCALITY_SIZE 16 -#define HAVE_LIBFABRIC_LOCALITY_SOCKADDR +# define HAVE_LIBFABRIC_LOCALITY_SIZE 16 +# define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif -namespace oomph -{ -// cppcheck-suppress ConfigurationNotChecked -static NS_DEBUG::enable_print loc_deb("LOCALTY"); -} // namespace oomph - -namespace oomph -{ -namespace libfabric -{ - -struct locality; - -// ------------------------------------------------------------------ -// format as ip address, port, libfabric address -// ------------------------------------------------------------------ -struct iplocality -{ - const locality& data; - iplocality(const locality& a); - friend std::ostream& operator<<(std::ostream& os, const iplocality& p); -}; - -// -------------------------------------------------------------------- -// Locality, in this structure we store the information required by -// libfabric to make a connection to another node. -// With libfabric 1.4.x the array contains the fabric ip address stored -// as the second uint32_t in the array. For this reason we use an -// array of uint32_t rather than uint8_t/char so we can easily access -// the ip for debug/validation purposes -// -------------------------------------------------------------------- -namespace locality_defs -{ -// the number of 32bit ints stored in our array -const uint32_t array_size = HAVE_LIBFABRIC_LOCALITY_SIZE; -const uint32_t array_length = HAVE_LIBFABRIC_LOCALITY_SIZE / 4; -} // namespace locality_defs - -struct locality -{ - // array type of our locality data - typedef std::array locality_data; - - static const char* type() { return "libfabric"; } - - explicit locality(const locality_data& in_data) - { - std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); - } - - locality() - { - std::memset(&data_[0], 0x00, locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); - } - - locality(const locality& other) - : data_(other.data_) - , fi_address_(other.fi_address_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); - } - - locality(const locality& other, fi_addr_t addr) - : data_(other.data_) - , fi_address_(addr) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); - } - - locality(locality&& other) - : data_(std::move(other.data_)) - , fi_address_(other.fi_address_) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); - } - - // provided to support sockets mode bootstrap - explicit locality(const std::string& address, const std::string& portnum) - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); - // - struct sockaddr_in socket_data; - memset(&socket_data, 0, sizeof(socket_data)); - socket_data.sin_family = AF_INET; - socket_data.sin_port = htons(std::stol(portnum)); - inet_pton(AF_INET, address.c_str(), &(socket_data.sin_addr)); - // - std::memcpy(&data_[0], &socket_data, locality_defs::array_size); - fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); - } - - // some condition marking this locality as valid - explicit inline operator bool() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - inline bool valid() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - locality& operator=(const locality& other) - { - data_ = other.data_; - fi_address_ = other.fi_address_; - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); - return *this; - } - - bool operator==(const locality& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); - return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; - } - - bool less_than(const locality& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); - if (ip_address() < other.ip_address()) return true; - if (ip_address() == other.ip_address()) return port() < other.port(); - return false; - } - - const uint32_t& ip_address() const - { +namespace oomph { + // cppcheck-suppress ConfigurationNotChecked + static NS_DEBUG::enable_print loc_deb("LOCALTY"); +} // namespace oomph + +namespace oomph { namespace libfabric { + + struct locality; + + // ------------------------------------------------------------------ + // format as ip address, port, libfabric address + // ------------------------------------------------------------------ + struct iplocality + { + locality const& data; + iplocality(locality const& a); + friend std::ostream& operator<<(std::ostream& os, iplocality const& p); + }; + + // -------------------------------------------------------------------- + // Locality, in this structure we store the information required by + // libfabric to make a connection to another node. + // With libfabric 1.4.x the array contains the fabric ip address stored + // as the second uint32_t in the array. For this reason we use an + // array of uint32_t rather than uint8_t/char so we can easily access + // the ip for debug/validation purposes + // -------------------------------------------------------------------- + namespace locality_defs { + // the number of 32bit ints stored in our array + uint32_t const array_size = HAVE_LIBFABRIC_LOCALITY_SIZE; + uint32_t const array_length = HAVE_LIBFABRIC_LOCALITY_SIZE / 4; + } // namespace locality_defs + + struct locality + { + // array type of our locality data + typedef std::array locality_data; + + static char const* type() { return "libfabric"; } + + explicit locality(locality_data const& in_data) + { + std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); + } + + locality() + { + std::memset(&data_[0], 0x00, locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); + } + + locality(locality const& other) + : data_(other.data_) + , fi_address_(other.fi_address_) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); + } + + locality(locality const& other, fi_addr_t addr) + : data_(other.data_) + , fi_address_(addr) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); + } + + locality(locality&& other) + : data_(std::move(other.data_)) + , fi_address_(other.fi_address_) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); + } + + // provided to support sockets mode bootstrap + explicit locality(std::string const& address, std::string const& portnum) + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); + // + struct sockaddr_in socket_data; + memset(&socket_data, 0, sizeof(socket_data)); + socket_data.sin_family = AF_INET; + socket_data.sin_port = htons(std::stol(portnum)); + inet_pton(AF_INET, address.c_str(), &(socket_data.sin_addr)); + // + std::memcpy(&data_[0], &socket_data, locality_defs::array_size); + fi_address_ = 0; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); + } + + // some condition marking this locality as valid + explicit inline operator bool() const + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); + return (ip_address() != 0); + } + + inline bool valid() const + { + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); + return (ip_address() != 0); + } + + locality& operator=(locality const& other) + { + data_ = other.data_; + fi_address_ = other.fi_address_; + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); + return *this; + } + + bool operator==(locality const& other) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); + return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; + } + + bool less_than(locality const& other) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); + if (ip_address() < other.ip_address()) return true; + if (ip_address() == other.ip_address()) return port() < other.port(); + return false; + } + + uint32_t const& ip_address() const + { #if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(data_.data())->sin_addr.s_addr; + return reinterpret_cast(data_.data())->sin_addr.s_addr; #elif defined(HAVE_LIBFABRIC_GNI) - return data_[0]; + return data_[0]; #elif defined(HAVE_LIBFABRIC_CXI) - return data_[0]; + return data_[0]; #elif defined(HAVE_LIBFABRIC_EFA) - return data_[0]; + return data_[0]; #else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); + throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif - } + } - static const uint32_t& ip_address(const locality_data& data) - { + static uint32_t const& ip_address(locality_data const& data) + { #if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(&data)->sin_addr.s_addr; + return reinterpret_cast(&data)->sin_addr.s_addr; #elif defined(HAVE_LIBFABRIC_GNI) - return data[0]; + return data[0]; #elif defined(HAVE_LIBFABRIC_CXI) - return data[0]; + return data[0]; #elif defined(HAVE_LIBFABRIC_EFA) - return data[0]; + return data[0]; #else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); + throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif - } - - inline const fi_addr_t& fi_address() const { return fi_address_; } - - inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } - - inline uint16_t port() const - { - uint16_t port = 256 * reinterpret_cast(data_.data())[2] + - reinterpret_cast(data_.data())[3]; - return port; - } - - inline const void* fabric_data() const { return data_.data(); } - - inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } - - private: - friend bool operator==(locality const& lhs, locality const& rhs) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); - return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); - } - - friend bool operator<(locality const& lhs, locality const& rhs) - { - const uint32_t& a1 = lhs.ip_address(); - const uint32_t& a2 = rhs.ip_address(); - const fi_addr_t& f1 = lhs.fi_address(); - const fi_addr_t& f2 = rhs.fi_address(); - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); - return (a1 < a2) || (a1 == a2 && f1 < f2); - } - - friend std::ostream& operator<<(std::ostream& os, locality const& loc) - { - for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } - return os; - } - - private: - locality_data data_; - fi_addr_t fi_address_; -}; - -} // namespace libfabric -} // namespace oomph + } + + inline fi_addr_t const& fi_address() const { return fi_address_; } + + inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } + + inline uint16_t port() const + { + uint16_t port = 256 * reinterpret_cast(data_.data())[2] + + reinterpret_cast(data_.data())[3]; + return port; + } + + inline void const* fabric_data() const { return data_.data(); } + + inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } + + private: + friend bool operator==(locality const& lhs, locality const& rhs) + { + LF_DEB(loc_deb, + trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); + return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); + } + + friend bool operator<(locality const& lhs, locality const& rhs) + { + uint32_t const& a1 = lhs.ip_address(); + uint32_t const& a2 = rhs.ip_address(); + fi_addr_t const& f1 = lhs.fi_address(); + fi_addr_t const& f2 = rhs.fi_address(); + LF_DEB( + loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); + return (a1 < a2) || (a1 == a2 && f1 < f2); + } + + friend std::ostream& operator<<(std::ostream& os, locality const& loc) + { + for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } + return os; + } + + private: + locality_data data_; + fi_addr_t fi_address_; + }; + +}} // namespace oomph::libfabric diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index 0cd5c4a7..f1eb5326 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -18,20 +18,19 @@ #include #include -#include "oomph_libfabric_defines.hpp" #include "fabric_error.hpp" +#include "oomph_libfabric_defines.hpp" #ifdef OOMPH_ENABLE_DEVICE -#include +# include #endif // ------------------------------------------------------------------ -namespace NS_MEMORY -{ +namespace NS_MEMORY { -static NS_DEBUG::enable_print mrn_deb("REGION_"); + static NS_DEBUG::enable_print mrn_deb("REGION_"); -/* + /* struct fi_mr_attr { union { const struct iovec *mr_iov; @@ -60,342 +59,356 @@ struct fi_mr_attr { */ -// This is the only part of the code that actually -// calls libfabric functions -struct region_provider -{ - // The internal memory region handle - using provider_region = struct fid_mr; - using provider_domain = struct fid_domain; - - // register region - static inline int fi_register_memory(provider_domain* pd, int device_id, const void* buf, - size_t len, uint64_t access_flags, uint64_t offset, uint64_t request_key, - struct fid_mr** mr) + // This is the only part of the code that actually + // calls libfabric functions + struct region_provider { - [[maybe_unused]] auto scp = - NS_MEMORY::mrn_deb.scope(__func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); - // - struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; - fi_mr_attr attr = { - /*.mr_iov = */ &addresses, - /*.iov_count = */ 1, - /*.access = */ access_flags, - /*.offset = */ offset, - /*.requested_key = */ request_key, - /*.context = */ nullptr, - /*.auth_key_size = */ 0, - /*.auth_key = */ nullptr, - /*.iface = */ FI_HMEM_SYSTEM, - /*.device = */ {0}, + // The internal memory region handle + using provider_region = struct fid_mr; + using provider_domain = struct fid_domain; + + // register region + static inline int fi_register_memory(provider_domain* pd, int device_id, void const* buf, + size_t len, uint64_t access_flags, uint64_t offset, uint64_t request_key, + struct fid_mr** mr) + { + [[maybe_unused]] auto scp = NS_MEMORY::mrn_deb.scope( + __func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); + // + struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; + fi_mr_attr attr = { + /*.mr_iov = */ &addresses, + /*.iov_count = */ 1, + /*.access = */ access_flags, + /*.offset = */ offset, + /*.requested_key = */ request_key, + /*.context = */ nullptr, + /*.auth_key_size = */ 0, + /*.auth_key = */ nullptr, + /*.iface = */ FI_HMEM_SYSTEM, + /*.device = */ {0}, #if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION > 17) - /*.hmem_data = */ nullptr, + /*.hmem_data = */ nullptr, #endif #if (FI_MAJOR_VERSION >= 2) - /*page_size = */ static_cast(sysconf(_SC_PAGESIZE)), - /*base_mr = */ nullptr, - /*sub_mr_cnt = */ 0, - }; + /*page_size = */ static_cast(sysconf(_SC_PAGESIZE)), + /*base_mr = */ nullptr, + /*sub_mr_cnt = */ 0, + }; #else - }; + }; #endif - if (device_id >= 0) - { + if (device_id >= 0) + { #ifdef OOMPH_ENABLE_DEVICE - attr.device.cuda = device_id; - int handle = hwmalloc::get_device_id(); - attr.device.cuda = handle; -#if defined(OOMPH_DEVICE_CUDA) - attr.iface = FI_HMEM_CUDA; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("CUDA"), "set device id", device_id, handle)); -#elif defined(OOMPH_DEVICE_HIP) - attr.iface = FI_HMEM_ROCR; - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("HIP"), "set device id", device_id, handle)); -#endif + attr.device.cuda = device_id; + int handle = hwmalloc::get_device_id(); + attr.device.cuda = handle; +# if defined(OOMPH_DEVICE_CUDA) + attr.iface = FI_HMEM_CUDA; + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("CUDA"), "set device id", device_id, handle)); +# elif defined(OOMPH_DEVICE_HIP) + attr.iface = FI_HMEM_ROCR; + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("HIP"), "set device id", device_id, handle)); +# endif #endif + } + uint64_t flags = 0; + int ret = fi_mr_regattr(pd, &attr, flags, mr); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "register_memory"); } + return ret; } - uint64_t flags = 0; - int ret = fi_mr_regattr(pd, &attr, flags, mr); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "register_memory"); } - return ret; - } - // unregister region - static inline int unregister_memory(provider_region* region) { return fi_close(®ion->fid); } - - // Default registration flags for this provider - static inline constexpr int access_flags() - { - return FI_READ | FI_WRITE | FI_RECV | FI_SEND /*| FI_REMOTE_READ | FI_REMOTE_WRITE*/; - } + // unregister region + static inline int unregister_memory(provider_region* region) + { + return fi_close(®ion->fid); + } - // Get the local descriptor of the memory region. - static inline void* get_local_key(provider_region* const region) { return fi_mr_desc(region); } + // Default registration flags for this provider + static inline constexpr int access_flags() + { + return FI_READ | FI_WRITE | FI_RECV | FI_SEND /*| FI_REMOTE_READ | FI_REMOTE_WRITE*/; + } - // Get the remote key of the memory region. - static inline uint64_t get_remote_key(provider_region* const region) - { - return fi_mr_key(region); - } -}; + // Get the local descriptor of the memory region. + static inline void* get_local_key(provider_region* const region) + { + return fi_mr_desc(region); + } -// -------------------------------------------------------------------- -// This is a handle to a small chunk of memory that has been registered -// as part of a much larger allocation (a memory_segment) -struct memory_handle -{ - // -------------------------------------------------------------------- - using provider_region = region_provider::provider_region; + // Get the remote key of the memory region. + static inline uint64_t get_remote_key(provider_region* const region) + { + return fi_mr_key(region); + } + }; // -------------------------------------------------------------------- - // Default constructor creates unusable handle(region) - memory_handle() - : address_{nullptr} - , region_{nullptr} - , size_{0} - , used_space_{0} - { - } - memory_handle(memory_handle const&) noexcept = default; - memory_handle& operator=(memory_handle const&) noexcept = default; - - memory_handle(provider_region* region, unsigned char* addr, - std::size_t size /*, uint32_t flags*/) noexcept - : address_{addr} - , region_{region} - , size_{uint32_t(size)} - , used_space_{0} + // This is a handle to a small chunk of memory that has been registered + // as part of a much larger allocation (a memory_segment) + struct memory_handle { - // LF_DEB(NS_MEMORY::mrn_deb, - // trace(NS_DEBUG::str<>("memory_handle"), *this)); - } + // -------------------------------------------------------------------- + using provider_region = region_provider::provider_region; + + // -------------------------------------------------------------------- + // Default constructor creates unusable handle(region) + memory_handle() + : address_{nullptr} + , region_{nullptr} + , size_{0} + , used_space_{0} + { + } + memory_handle(memory_handle const&) noexcept = default; + memory_handle& operator=(memory_handle const&) noexcept = default; + + memory_handle(provider_region* region, unsigned char* addr, + std::size_t size /*, uint32_t flags*/) noexcept + : address_{addr} + , region_{region} + , size_{uint32_t(size)} + , used_space_{0} + { + // LF_DEB(NS_MEMORY::mrn_deb, + // trace(NS_DEBUG::str<>("memory_handle"), *this)); + } - // -------------------------------------------------------------------- - // move constructor, clear other region so that it is not unregistered twice - memory_handle(memory_handle&& other) noexcept - : address_{other.address_} - , region_{std::exchange(other.region_, nullptr)} - , size_{other.size_} - , used_space_{other.used_space_} - { - } + // -------------------------------------------------------------------- + // move constructor, clear other region so that it is not unregistered twice + memory_handle(memory_handle&& other) noexcept + : address_{other.address_} + , region_{std::exchange(other.region_, nullptr)} + , size_{other.size_} + , used_space_{other.used_space_} + { + } - // -------------------------------------------------------------------- - // move assignment, clear other region so that it is not unregistered twice - memory_handle& operator=(memory_handle&& other) noexcept - { - address_ = other.address_; - region_ = std::exchange(other.region_, nullptr); - size_ = other.size_; - used_space_ = other.used_space_; - return *this; - } + // -------------------------------------------------------------------- + // move assignment, clear other region so that it is not unregistered twice + memory_handle& operator=(memory_handle&& other) noexcept + { + address_ = other.address_; + region_ = std::exchange(other.region_, nullptr); + size_ = other.size_; + used_space_ = other.used_space_; + return *this; + } - // -------------------------------------------------------------------- - // Return the address of this memory region block. - inline unsigned char* get_address(void) const { return address_; } + // -------------------------------------------------------------------- + // Return the address of this memory region block. + inline unsigned char* get_address(void) const { return address_; } - // -------------------------------------------------------------------- - // Get the local descriptor of the memory region. - inline void* get_local_key(void) const { return region_provider::get_local_key(region_); } + // -------------------------------------------------------------------- + // Get the local descriptor of the memory region. + inline void* get_local_key(void) const { return region_provider::get_local_key(region_); } - // -------------------------------------------------------------------- - // Get the remote key of the memory region. - inline uint64_t get_remote_key(void) const { return region_provider::get_remote_key(region_); } + // -------------------------------------------------------------------- + // Get the remote key of the memory region. + inline uint64_t get_remote_key(void) const + { + return region_provider::get_remote_key(region_); + } - // -------------------------------------------------------------------- - // Get the size of the memory chunk usable by this memory region, - // this may be smaller than the value returned by get_length - // if the region is a sub region (partial region) within another block - inline uint64_t get_size(void) const { return size_; } + // -------------------------------------------------------------------- + // Get the size of the memory chunk usable by this memory region, + // this may be smaller than the value returned by get_length + // if the region is a sub region (partial region) within another block + inline uint64_t get_size(void) const { return size_; } - // -------------------------------------------------------------------- - // Get the size used by a message in the memory region. - inline uint32_t get_message_length(void) const { return used_space_; } + // -------------------------------------------------------------------- + // Get the size used by a message in the memory region. + inline uint32_t get_message_length(void) const { return used_space_; } - // -------------------------------------------------------------------- - // Set the size used by a message in the memory region. - inline void set_message_length(uint32_t length) { used_space_ = length; } + // -------------------------------------------------------------------- + // Set the size used by a message in the memory region. + inline void set_message_length(uint32_t length) { used_space_ = length; } - // -------------------------------------------------------------------- - void release_region() noexcept { region_ = nullptr; } + // -------------------------------------------------------------------- + void release_region() noexcept { region_ = nullptr; } - // -------------------------------------------------------------------- - // return the underlying libfabric region handle - inline provider_region* get_region() const { return region_; } + // -------------------------------------------------------------------- + // return the underlying libfabric region handle + inline provider_region* get_region() const { return region_; } - // -------------------------------------------------------------------- - // Deregister the memory region. - // returns 0 when successful, -1 otherwise - int deregister(void) const - { - if (region_ /*&& !get_user_region()*/) + // -------------------------------------------------------------------- + // Deregister the memory region. + // returns 0 when successful, -1 otherwise + int deregister(void) const { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("release"), region_)); - // - if (region_provider::unregister_memory(region_)) + if (region_ /*&& !get_user_region()*/) { - LF_DEB(NS_MEMORY::mrn_deb, error("fi_close mr failed")); - return -1; + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("release"), region_)); + // + if (region_provider::unregister_memory(region_)) + { + LF_DEB(NS_MEMORY::mrn_deb, error("fi_close mr failed")); + return -1; + } + else + { + LF_DEB( + NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("de-Registered region"), *this)); + } + region_ = nullptr; } - else - { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("de-Registered region"), *this)); - } - region_ = nullptr; + return 0; } - return 0; - } - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_handle const& region) - { - (void)region; + // -------------------------------------------------------------------- + friend std::ostream& operator<<(std::ostream& os, memory_handle const& region) + { + (void) region; #if 1 || has_debug - os << "region " - << NS_DEBUG::ptr(®ion) - //<< " fi_region " << NS_DEBUG::ptr(region.region_) - << " address " << NS_DEBUG::ptr(region.address_) << " size " - << NS_DEBUG::hex<6>(region.size_) - //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) - << " loc key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_local_key(region.region_) : nullptr) - << " rem key " - << NS_DEBUG::ptr(region.region_ ? region_provider::get_remote_key(region.region_) : 0); - ///// clang-format off - ///// clang-format on + os << "region " + << NS_DEBUG::ptr(®ion) + //<< " fi_region " << NS_DEBUG::ptr(region.region_) + << " address " << NS_DEBUG::ptr(region.address_) << " size " + << NS_DEBUG::hex<6>(region.size_) + //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) + << " loc key " + << NS_DEBUG::ptr( + region.region_ ? region_provider::get_local_key(region.region_) : nullptr) + << " rem key " + << NS_DEBUG::ptr( + region.region_ ? region_provider::get_remote_key(region.region_) : 0); + ///// clang-format off + ///// clang-format on #endif - return os; - } - - protected: - // This gives the start address of this region. - // This is the address that should be used for data storage - unsigned char* address_; + return os; + } - // The hardware level handle to the region (as returned from libfabric fi_mr_reg) - mutable provider_region* region_; + protected: + // This gives the start address of this region. + // This is the address that should be used for data storage + unsigned char* address_; - // The (maximum available) size of the memory buffer - uint32_t size_; + // The hardware level handle to the region (as returned from libfabric fi_mr_reg) + mutable provider_region* region_; - // Space used by a message in the memory region. - // This may be smaller/less than the size available if more space - // was allocated than it turns out was needed - mutable uint32_t used_space_; -}; + // The (maximum available) size of the memory buffer + uint32_t size_; -// -------------------------------------------------------------------- -// a memory segment is a pinned block of memory that has been specialized -// by a particular region provider. Each provider (infiniband, libfabric, -// other) has a different definition for the object and the protection -// domain used to limit access. -// -------------------------------------------------------------------- -struct memory_segment : public memory_handle -{ - using provider_domain = region_provider::provider_domain; - using provider_region = region_provider::provider_region; - using handle_type = memory_handle; + // Space used by a message in the memory region. + // This may be smaller/less than the size available if more space + // was allocated than it turns out was needed + mutable uint32_t used_space_; + }; // -------------------------------------------------------------------- - memory_segment(provider_region* region, unsigned char* address, unsigned char* base_address, - uint64_t size) - : memory_handle(region, address, size) - , base_addr_(base_address) - { - } - + // a memory segment is a pinned block of memory that has been specialized + // by a particular region provider. Each provider (infiniband, libfabric, + // other) has a different definition for the object and the protection + // domain used to limit access. // -------------------------------------------------------------------- - // move constructor, clear other region - memory_segment(memory_segment&& other) noexcept - : memory_handle(std::move(other)) - , base_addr_{std::exchange(other.base_addr_, nullptr)} + struct memory_segment : public memory_handle { - } + using provider_domain = region_provider::provider_domain; + using provider_region = region_provider::provider_region; + using handle_type = memory_handle; + + // -------------------------------------------------------------------- + memory_segment(provider_region* region, unsigned char* address, unsigned char* base_address, + uint64_t size) + : memory_handle(region, address, size) + , base_addr_(base_address) + { + } - // -------------------------------------------------------------------- - // move assignment, clear other region - memory_segment& operator=(memory_segment&& other) noexcept - { - memory_handle(std::move(other)); - region_ = std::exchange(other.region_, nullptr); - return *this; - } + // -------------------------------------------------------------------- + // move constructor, clear other region + memory_segment(memory_segment&& other) noexcept + : memory_handle(std::move(other)) + , base_addr_{std::exchange(other.base_addr_, nullptr)} + { + } - // -------------------------------------------------------------------- - // construct a memory region object by registering an existing address buffer - // we do not cache local/remote keys here because memory segments are only - // used by the heap to store chunks and the user will always receive - // a memory_handle - which does have keys cached - memory_segment(provider_domain* pd, const void* buffer, const uint64_t length, bool bind_mr, - void* ep, int device_id) - { - // an rma key counter to keep some providers (CXI) happy - static std::atomic key = 0; - // - address_ = static_cast(const_cast(buffer)); - size_ = length; - used_space_ = length; - region_ = nullptr; - // - base_addr_ = memory_handle::address_; - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); - - int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, - region_provider::access_flags(), 0, key++, &(region_)); - if (!ret) + // -------------------------------------------------------------------- + // move assignment, clear other region + memory_segment& operator=(memory_segment&& other) noexcept { - LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + memory_handle(std::move(other)); + region_ = std::exchange(other.region_, nullptr); + return *this; } - if (bind_mr) + // -------------------------------------------------------------------- + // construct a memory region object by registering an existing address buffer + // we do not cache local/remote keys here because memory segments are only + // used by the heap to store chunks and the user will always receive + // a memory_handle - which does have keys cached + memory_segment(provider_domain* pd, void const* buffer, uint64_t const length, bool bind_mr, + void* ep, int device_id) { - ret = fi_mr_bind(region_, (struct fid*)ep, 0); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + // an rma key counter to keep some providers (CXI) happy + static std::atomic key = 0; + // + address_ = static_cast(const_cast(buffer)); + size_ = length; + used_space_ = length; + region_ = nullptr; + // + base_addr_ = memory_handle::address_; + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); - ret = fi_mr_enable(region_); - if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); } + int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, + region_provider::access_flags(), 0, key++, &(region_)); + if (!ret) + { + LF_DEB(NS_MEMORY::mrn_deb, + trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + } + + if (bind_mr) + { + ret = fi_mr_bind(region_, (struct fid*) ep, 0); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + + ret = fi_mr_enable(region_); + if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } + else + { + LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); + } + } } - } - // -------------------------------------------------------------------- - // destroy the region and memory according to flag settings - ~memory_segment() { deregister(); } + // -------------------------------------------------------------------- + // destroy the region and memory according to flag settings + ~memory_segment() { deregister(); } - handle_type get_handle(std::size_t offset, std::size_t size) const noexcept - { - return memory_handle(region_, base_addr_ + offset, size); - } + handle_type get_handle(std::size_t offset, std::size_t size) const noexcept + { + return memory_handle(region_, base_addr_ + offset, size); + } - // -------------------------------------------------------------------- - // Get the address of the base memory region. - // This is the address of the memory allocated from the system - inline unsigned char* get_base_address(void) const { return base_addr_; } + // -------------------------------------------------------------------- + // Get the address of the base memory region. + // This is the address of the memory allocated from the system + inline unsigned char* get_base_address(void) const { return base_addr_; } - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, memory_segment const& region) - { - (void)region; + // -------------------------------------------------------------------- + friend std::ostream& operator<<(std::ostream& os, memory_segment const& region) + { + (void) region; #if has_debug - // clang-format off + // clang-format off os << *static_cast(®ion) << " base address " << NS_DEBUG::ptr(region.base_addr_); - // clang-format on + // clang-format on #endif - return os; - } + return os; + } - public: - // this is the base address of the memory registered by this segment - // individual memory_handles are offset from this address - unsigned char* base_addr_; -}; + public: + // this is the base address of the memory registered by this segment + // individual memory_handles are offset from this address + unsigned char* base_addr_; + }; -} // namespace NS_MEMORY +} // namespace NS_MEMORY diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index ce5081dd..8c8d277f 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -8,49 +8,52 @@ * SPDX-License-Identifier: BSD-3-Clause */ // paths relative to backend -#include -#include #include #include +#include +#include -namespace oomph::libfabric -{ -void -operation_context::handle_cancelled() -{ - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); - // enqueue the cancelled/callback - if (std::holds_alternative(m_req)) - { - // regular (non-shared) recv - auto s = std::get(m_req); - while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} - } - else if (std::holds_alternative(m_req)) +namespace oomph::libfabric { + void operation_context::handle_cancelled() { - // shared recv - auto s = std::get(m_req); - while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + // enqueue the cancelled/callback + if (std::holds_alternative(m_req)) + { + // regular (non-shared) recv + auto s = std::get(m_req); + while (!(s->m_comm->m_recv_cb_cancel.push(s))) {} + } + else if (std::holds_alternative(m_req)) + { + // shared recv + auto s = std::get(m_req); + while (!(s->m_ctxt->m_recv_cb_cancel.push(s))) {} + } + else { throw std::runtime_error("Request state invalid in handle_cancelled"); } } - else { throw std::runtime_error("Request state invalid in handle_cancelled"); } -} -int -operation_context::handle_tagged_recv_completion_impl(void* user_data) -{ - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); - if (std::holds_alternative(m_req)) + int operation_context::handle_tagged_recv_completion_impl(void* user_data) { - // regular (non-shared) recv - auto s = std::get(m_req); - //if (std::this_thread::get_id() == thread_id_) - if (reinterpret_cast(user_data) == s->m_comm) + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + if (std::holds_alternative(m_req)) { - if (!s->m_comm->has_reached_recursion_depth()) + // regular (non-shared) recv + auto s = std::get(m_req); + //if (std::this_thread::get_id() == thread_id_) + if (reinterpret_cast(user_data) == s->m_comm) { - auto inc = s->m_comm->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); + if (!s->m_comm->has_reached_recursion_depth()) + { + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_recv_cb_queue.push(s))) {} + } } else { @@ -58,82 +61,76 @@ operation_context::handle_tagged_recv_completion_impl(void* user_data) while (!(s->m_comm->m_recv_cb_queue.push(s))) {} } } - else - { - // enqueue the callback - while (!(s->m_comm->m_recv_cb_queue.push(s))) {} - } - } - else if (std::holds_alternative(m_req)) - { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) + else if (std::holds_alternative(m_req)) { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); - } - else - { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} - } - } - else - { - detail::request_state** req = reinterpret_cast(&m_req); - LF_DEB(NS_MEMORY::opctx_deb<9>, - error(NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); - throw std::runtime_error("Request state invalid in handle_tagged_recv"); - } - return 1; -} - -int -operation_context::handle_tagged_send_completion_impl(void* user_data) -{ - if (std::holds_alternative(m_req)) - { - // regular (non-shared) recv - auto s = std::get(m_req); - if (reinterpret_cast(user_data) == s->m_comm) - { - if (!s->m_comm->has_reached_recursion_depth()) + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) { - auto inc = s->m_comm->recursion(); + auto inc = s->m_comm->m_context->recursion(); auto ptr = s->release_self_ref(); s->invoke_cb(); } else { // enqueue the callback - while (!(s->m_comm->m_send_cb_queue.push(s))) {} + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} } } else { - // enqueue the callback - while (!(s->m_comm->m_send_cb_queue.push(s))) {} + detail::request_state** req = reinterpret_cast(&m_req); + LF_DEB(NS_MEMORY::opctx_deb<9>, + error( + NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); + throw std::runtime_error("Request state invalid in handle_tagged_recv"); } + return 1; } - else if (std::holds_alternative(m_req)) + + int operation_context::handle_tagged_send_completion_impl(void* user_data) { - // shared recv - auto s = std::get(m_req); - if (!s->m_comm->m_context->has_reached_recursion_depth()) + if (std::holds_alternative(m_req)) { - auto inc = s->m_comm->m_context->recursion(); - auto ptr = s->release_self_ref(); - s->invoke_cb(); + // regular (non-shared) recv + auto s = std::get(m_req); + if (reinterpret_cast(user_data) == s->m_comm) + { + if (!s->m_comm->has_reached_recursion_depth()) + { + auto inc = s->m_comm->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_send_cb_queue.push(s))) {} + } + } + else + { + // enqueue the callback + while (!(s->m_comm->m_send_cb_queue.push(s))) {} + } } - else + else if (std::holds_alternative(m_req)) { - // enqueue the callback - while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} + // shared recv + auto s = std::get(m_req); + if (!s->m_comm->m_context->has_reached_recursion_depth()) + { + auto inc = s->m_comm->m_context->recursion(); + auto ptr = s->release_self_ref(); + s->invoke_cb(); + } + else + { + // enqueue the callback + while (!(s->m_comm->m_context->m_recv_cb_queue.push(s))) {} + } } + else { throw std::runtime_error("Request state invalid in handle_tagged_send"); } + return 1; } - else { throw std::runtime_error("Request state invalid in handle_tagged_send"); } - return 1; -} -} // namespace oomph::libfabric +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index ad106e6a..0f6b5103 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -15,39 +15,38 @@ // #include "operation_context_base.hpp" // -namespace oomph::libfabric -{ - -template -inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); - -// This struct holds the ready state of a future -// we must also store the context used in libfabric, in case -// a request is cancelled - fi_cancel(...) needs it -struct operation_context : public operation_context_base -{ - std::variant m_req; - - template - operation_context(RequestState* req) - : operation_context_base() - , m_req{req} - { - [[maybe_unused]] auto scp = - opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); - } - - // -------------------------------------------------------------------- - // When a completion returns FI_ECANCELED, this is called - void handle_cancelled(); +namespace oomph::libfabric { - // -------------------------------------------------------------------- - // Called when a tagged recv completes - int handle_tagged_recv_completion_impl(void* user_data); + template + inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); - // -------------------------------------------------------------------- - // Called when a tagged send completes - int handle_tagged_send_completion_impl(void* user_data); -}; - -} // namespace oomph::libfabric + // This struct holds the ready state of a future + // we must also store the context used in libfabric, in case + // a request is cancelled - fi_cancel(...) needs it + struct operation_context : public operation_context_base + { + std::variant m_req; + + template + operation_context(RequestState* req) + : operation_context_base() + , m_req{req} + { + [[maybe_unused]] auto scp = + opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); + } + + // -------------------------------------------------------------------- + // When a completion returns FI_ECANCELED, this is called + void handle_cancelled(); + + // -------------------------------------------------------------------- + // Called when a tagged recv completes + int handle_tagged_recv_completion_impl(void* user_data); + + // -------------------------------------------------------------------- + // Called when a tagged send completes + int handle_tagged_send_completion_impl(void* user_data); + }; + +} // namespace oomph::libfabric diff --git a/src/libfabric/operation_context_base.hpp b/src/libfabric/operation_context_base.hpp index e5156f99..5de5c386 100644 --- a/src/libfabric/operation_context_base.hpp +++ b/src/libfabric/operation_context_base.hpp @@ -12,85 +12,84 @@ #include #include "oomph_libfabric_defines.hpp" -namespace NS_LIBFABRIC -{ +namespace NS_LIBFABRIC { -class controller; + class controller; -static NS_DEBUG::enable_print ctx_bas("CTXBASE"); + static NS_DEBUG::enable_print ctx_bas("CTXBASE"); -// This struct holds the ready state of a future -// we must also store the context used in libfabric, in case -// a request is cancelled - fi_cancel(...) needs it -template -struct operation_context_base -{ - private: - // libfabric requires some space for it's internal bookkeeping - // so the first member of this struct must be fi_context - fi_context context_reserved_space; - - public: - operation_context_base() - : context_reserved_space() + // This struct holds the ready state of a future + // we must also store the context used in libfabric, in case + // a request is cancelled - fi_cancel(...) needs it + template + struct operation_context_base { - [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); - } + private: + // libfabric requires some space for it's internal bookkeeping + // so the first member of this struct must be fi_context + fi_context context_reserved_space; - // error - void handle_error(struct fi_cq_err_entry& err) - { - static_cast(this)->handle_error_impl(err); - } - void handle_error_impl(struct fi_cq_err_entry& /*err*/) { std::terminate(); } + public: + operation_context_base() + : context_reserved_space() + { + [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); + } - void handle_cancelled() { static_cast(this)->handle_cancelled_impl(); } - void handle_cancelled_impl() { std::terminate(); } + // error + void handle_error(struct fi_cq_err_entry& err) + { + static_cast(this)->handle_error_impl(err); + } + void handle_error_impl(struct fi_cq_err_entry& /*err*/) { std::terminate(); } - // send - int handle_send_completion() - { - return static_cast(this)->handle_send_completion_impl(); - } - int handle_send_completion_impl() { return 0; } + void handle_cancelled() { static_cast(this)->handle_cancelled_impl(); } + void handle_cancelled_impl() { std::terminate(); } - // tagged send - int handle_tagged_send_completion(void* user_data) - { - return static_cast(this)->handle_tagged_send_completion_impl(user_data); - } - int handle_tagged_send_completion_impl(void* /*user_data*/) { return 0; } + // send + int handle_send_completion() + { + return static_cast(this)->handle_send_completion_impl(); + } + int handle_send_completion_impl() { return 0; } - // recv - int handle_recv_completion(std::uint64_t len) - { - return static_cast(this)->handle_recv_completion_impl(len); - } - int handle_recv_completion_impl(std::uint64_t /*len*/) { return 0; } + // tagged send + int handle_tagged_send_completion(void* user_data) + { + return static_cast(this)->handle_tagged_send_completion_impl(user_data); + } + int handle_tagged_send_completion_impl(void* /*user_data*/) { return 0; } - // tagged recv - int handle_tagged_recv_completion(void* user_data) - { - return static_cast(this)->handle_tagged_recv_completion_impl(user_data); - } - int handle_tagged_recv_completion_impl(bool /*threadlocal*/) { return 0; } + // recv + int handle_recv_completion(std::uint64_t len) + { + return static_cast(this)->handle_recv_completion_impl(len); + } + int handle_recv_completion_impl(std::uint64_t /*len*/) { return 0; } - void handle_rma_read_completion() - { - static_cast(this)->handle_rma_read_completion_impl(); - } - void handle_rma_read_completion_impl() {} + // tagged recv + int handle_tagged_recv_completion(void* user_data) + { + return static_cast(this)->handle_tagged_recv_completion_impl(user_data); + } + int handle_tagged_recv_completion_impl(bool /*threadlocal*/) { return 0; } - // unknown sender = new connection - int handle_new_connection(controller* ctrl, std::uint64_t len) - { - return static_cast(this)->handle_new_connection_impl(ctrl, len); - } - int handle_new_connection_impl(controller*, std::uint64_t) { return 0; } -}; + void handle_rma_read_completion() + { + static_cast(this)->handle_rma_read_completion_impl(); + } + void handle_rma_read_completion_impl() {} -// provided so that a pointer can be cast to this and the operation_context_type queried -struct unspecialized_context : public operation_context_base -{ -}; -} // namespace NS_LIBFABRIC + // unknown sender = new connection + int handle_new_connection(controller* ctrl, std::uint64_t len) + { + return static_cast(this)->handle_new_connection_impl(ctrl, len); + } + int handle_new_connection_impl(controller*, std::uint64_t) { return 0; } + }; + + // provided so that a pointer can be cast to this and the operation_context_type queried + struct unspecialized_context : public operation_context_base + { + }; +} // namespace NS_LIBFABRIC diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index cf8de408..73c37c41 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -27,12 +27,12 @@ #include // #if defined(__linux) || defined(linux) || defined(__linux__) -#include -#include +# include +# include #elif defined(__APPLE__) -#include -#include -#define environ (*_NSGetEnviron()) +# include +# include +# define environ (*_NSGetEnviron()) #else extern char** environ; #endif @@ -78,665 +78,648 @@ extern char** environ; // ------------------------------------------------------------ /// \cond NODETAIL -namespace NS_DEBUG -{ - -// ------------------------------------------------------------------ -// format as zero padded int -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct dec -{ - constexpr dec(T const& v) - : data_(v) - { - } +namespace NS_DEBUG { - T const& data_; + // ------------------------------------------------------------------ + // format as zero padded int + // ------------------------------------------------------------------ + namespace detail { - friend std::ostream& operator<<(std::ostream& os, dec const& d) - { - os << std::right << std::setfill('0') << std::setw(N) << std::noshowbase << std::dec - << d.data_; - return os; - } -}; -} // namespace detail - -template -constexpr detail::dec -dec(T const& v) -{ - return detail::dec(v); -} - -// ------------------------------------------------------------------ -// format as pointer -// ------------------------------------------------------------------ -struct ptr -{ - ptr(void const* v) - : data_(v) - { - } - ptr(std::uintptr_t const v) - : data_(reinterpret_cast(v)) - { - } - void const* data_; - friend std::ostream& operator<<(std::ostream& os, ptr const& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase - << std::hex << reinterpret_cast(d.data_); - return os; - } -}; - -// ------------------------------------------------------------------ -// format as zero padded hex -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct hex; - -template -struct hex::value>::type> -{ - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const hex& d) - { - os << std::right << "0x" << std::setfill('0') << std::setw(N) << std::noshowbase << std::hex - << d.data_; - return os; - } -}; + template + struct dec + { + constexpr dec(T const& v) + : data_(v) + { + } -template -struct hex::value>::type> -{ - constexpr hex(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const hex& d) - { - os << std::right << std::setw(N) << std::noshowbase << std::hex << d.data_; - return os; - } -}; -} // namespace detail - -template -constexpr detail::hex -hex(T const& v) -{ - return detail::hex(v); -} - -// ------------------------------------------------------------------ -// format as binary bits -// ------------------------------------------------------------------ -namespace detail -{ - -template -struct bin -{ - constexpr bin(T const& v) - : data_(v) - { - } - T const& data_; - friend std::ostream& operator<<(std::ostream& os, const bin& d) + T const& data_; + + friend std::ostream& operator<<(std::ostream& os, dec const& d) + { + os << std::right << std::setfill('0') << std::setw(N) << std::noshowbase << std::dec + << d.data_; + return os; + } + }; + } // namespace detail + + template + constexpr detail::dec dec(T const& v) { - os << std::bitset(d.data_); - return os; + return detail::dec(v); } -}; -} // namespace detail - -template -constexpr detail::bin -bin(T const& v) -{ - return detail::bin(v); -} - -// ------------------------------------------------------------------ -// format as padded string -// ------------------------------------------------------------------ -template -struct str -{ - constexpr str(char const* v) - : data_(v) + + // ------------------------------------------------------------------ + // format as pointer + // ------------------------------------------------------------------ + struct ptr { - } + ptr(void const* v) + : data_(v) + { + } + ptr(std::uintptr_t const v) + : data_(reinterpret_cast(v)) + { + } + void const* data_; + friend std::ostream& operator<<(std::ostream& os, ptr const& d) + { + os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase + << std::hex << reinterpret_cast(d.data_); + return os; + } + }; + + // ------------------------------------------------------------------ + // format as zero padded hex + // ------------------------------------------------------------------ + namespace detail { - char const* data_; + template + struct hex; - friend std::ostream& operator<<(std::ostream& os, str const& d) + template + struct hex::value>::type> + { + constexpr hex(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, hex const& d) + { + os << std::right << "0x" << std::setfill('0') << std::setw(N) << std::noshowbase + << std::hex << d.data_; + return os; + } + }; + + template + struct hex::value>::type> + { + constexpr hex(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, hex const& d) + { + os << std::right << std::setw(N) << std::noshowbase << std::hex << d.data_; + return os; + } + }; + } // namespace detail + + template + constexpr detail::hex hex(T const& v) { - os << std::left << std::setfill(' ') << std::setw(N) << d.data_; - return os; + return detail::hex(v); } -}; - -// ------------------------------------------------------------------ -// format as ip address -// ------------------------------------------------------------------ -struct ipaddr -{ - ipaddr(const void* a) - : data_(reinterpret_cast(a)) - , ipdata_(0) + + // ------------------------------------------------------------------ + // format as binary bits + // ------------------------------------------------------------------ + namespace detail { + + template + struct bin + { + constexpr bin(T const& v) + : data_(v) + { + } + T const& data_; + friend std::ostream& operator<<(std::ostream& os, bin const& d) + { + os << std::bitset(d.data_); + return os; + } + }; + } // namespace detail + + template + constexpr detail::bin bin(T const& v) { + return detail::bin(v); } - ipaddr(const uint32_t a) - : data_(reinterpret_cast(&ipdata_)) - , ipdata_(a) + + // ------------------------------------------------------------------ + // format as padded string + // ------------------------------------------------------------------ + template + struct str { - } - const uint8_t* data_; - const uint32_t ipdata_; + constexpr str(char const* v) + : data_(v) + { + } + + char const* data_; + + friend std::ostream& operator<<(std::ostream& os, str const& d) + { + os << std::left << std::setfill(' ') << std::setw(N) << d.data_; + return os; + } + }; - friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) + // ------------------------------------------------------------------ + // format as ip address + // ------------------------------------------------------------------ + struct ipaddr { - os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) << "." - << int(p.data_[3]); - return os; - } -}; - -// ------------------------------------------------------------------ -// helper fuction for printing CRC32 -// ------------------------------------------------------------------ -inline uint32_t -crc32(const void* address, size_t length) -{ - boost::crc_32_type result; - result.process_bytes(address, length); - return result.checksum(); -} - -// ------------------------------------------------------------------ -// helper fuction for printing short memory dump and crc32 -// useful for debugging corruptions in buffers during -// rma or other transfers -// ------------------------------------------------------------------ -struct mem_crc32 -{ - mem_crc32(const void* a, std::size_t len, const char* txt) - : addr_(reinterpret_cast(a)) - , len_(len) - , txt_(txt) + ipaddr(void const* a) + : data_(reinterpret_cast(a)) + , ipdata_(0) + { + } + ipaddr(uint32_t const a) + : data_(reinterpret_cast(&ipdata_)) + , ipdata_(a) + { + } + uint8_t const* data_; + uint32_t const ipdata_; + + friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) + { + os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) + << "." << int(p.data_[3]); + return os; + } + }; + + // ------------------------------------------------------------------ + // helper fuction for printing CRC32 + // ------------------------------------------------------------------ + inline uint32_t crc32(void const* address, size_t length) { + boost::crc_32_type result; + result.process_bytes(address, length); + return result.checksum(); } - const std::uint8_t* addr_; - const std::size_t len_; - const char* txt_; - friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) + + // ------------------------------------------------------------------ + // helper fuction for printing short memory dump and crc32 + // useful for debugging corruptions in buffers during + // rma or other transfers + // ------------------------------------------------------------------ + struct mem_crc32 { - const std::uint8_t* byte = static_cast(p.addr_); - os << "Memory:"; - os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) - << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; - size_t i = 0; - while (i < std::min(size_t(128), p.len_)) - { - os << "0x"; - for (int j = 7; j >= 0; j--) + mem_crc32(void const* a, std::size_t len, char const* txt) + : addr_(reinterpret_cast(a)) + , len_(len) + , txt_(txt) + { + } + std::uint8_t const* addr_; + std::size_t const len_; + char const* txt_; + friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) + { + std::uint8_t const* byte = static_cast(p.addr_); + os << "Memory:"; + os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) + << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; + size_t i = 0; + while (i < std::min(size_t(128), p.len_)) { - os << std::hex << std::setfill('0') << std::setw(2) - << (((i + j) > p.len_) ? (int)0 : (int)byte[i + j]); + os << "0x"; + for (int j = 7; j >= 0; j--) + { + os << std::hex << std::setfill('0') << std::setw(2) + << (((i + j) > p.len_) ? (int) 0 : (int) byte[i + j]); + } + i += 8; + if (i % 32 == 0) + os << std::endl; + else + os << " "; } - i += 8; - if (i % 32 == 0) os << std::endl; - else - os << " "; + os << ": " << p.txt_; + return os; } - os << ": " << p.txt_; - return os; - } -}; - -namespace detail -{ - -template -void -tuple_print(std::ostream& os, TupleType const& t, std::index_sequence) -{ - (..., (os << (I == 0 ? "" : " ") << std::get(t))); -} - -template -void -tuple_print(std::ostream& os, const std::tuple& t) -{ - tuple_print(os, t, std::make_index_sequence()); -} -} // namespace detail - -namespace detail -{ - -// ------------------------------------------------------------------ -// helper class for printing thread ID -// ------------------------------------------------------------------ -struct current_thread_print_helper -{ -}; - -inline std::ostream& -operator<<(std::ostream& os, current_thread_print_helper const&) -{ - os << hex<12, std::thread::id>(std::this_thread::get_id()) + }; + + namespace detail { + + template + void tuple_print(std::ostream& os, TupleType const& t, std::index_sequence) + { + (..., (os << (I == 0 ? "" : " ") << std::get(t))); + } + + template + void tuple_print(std::ostream& os, std::tuple const& t) + { + tuple_print(os, t, std::make_index_sequence()); + } + } // namespace detail + + namespace detail { + + // ------------------------------------------------------------------ + // helper class for printing thread ID + // ------------------------------------------------------------------ + struct current_thread_print_helper + { + }; + + inline std::ostream& operator<<(std::ostream& os, current_thread_print_helper const&) + { + os << hex<12, std::thread::id>(std::this_thread::get_id()) #ifdef DEBUGGING_PRINT_LINUX - << " cpu " << debug::dec<3, int>(sched_getcpu()) << " "; + << " cpu " << debug::dec<3, int>(sched_getcpu()) << " "; #else - << " cpu " - << "--- "; + << " cpu " + << "--- "; #endif - return os; -} - -// ------------------------------------------------------------------ -// helper class for printing time since start -// ------------------------------------------------------------------ -struct hostname_print_helper -{ - const char* get_hostname() const - { - static bool initialized = false; - static char hostname_[20]; - if (!initialized) - { - initialized = true; - gethostname(hostname_, std::size_t(12)); - std::string temp = "(" + std::to_string(guess_rank()) + ")"; - std::strcat(hostname_, temp.c_str()); + return os; } - return hostname_; - } - int guess_rank() const - { - std::vector env_strings{"_RANK=", "_NODEID="}; - for (char** current = environ; *current; current++) + // ------------------------------------------------------------------ + // helper class for printing time since start + // ------------------------------------------------------------------ + struct hostname_print_helper { - auto e = std::string(*current); - for (auto s : env_strings) + char const* get_hostname() const + { + static bool initialized = false; + static char hostname_[20]; + if (!initialized) + { + initialized = true; + gethostname(hostname_, std::size_t(12)); + std::string temp = "(" + std::to_string(guess_rank()) + ")"; + std::strcat(hostname_, temp.c_str()); + } + return hostname_; + } + + int guess_rank() const { - auto pos = e.find(s); - if (pos != std::string::npos) + std::vector env_strings{"_RANK=", "_NODEID="}; + for (char** current = environ; *current; current++) { - //std::cout << "Got a rank string : " << e << std::endl; - return std::stoi(e.substr(pos + s.size(), 5)); + auto e = std::string(*current); + for (auto s : env_strings) + { + auto pos = e.find(s); + if (pos != std::string::npos) + { + //std::cout << "Got a rank string : " << e << std::endl; + return std::stoi(e.substr(pos + s.size(), 5)); + } + } } + return -1; } + }; + + inline std::ostream& operator<<(std::ostream& os, hostname_print_helper const& h) + { + os << debug::str<13>(h.get_hostname()) << " "; + return os; } - return -1; - } -}; - -inline std::ostream& -operator<<(std::ostream& os, hostname_print_helper const& h) -{ - os << debug::str<13>(h.get_hostname()) << " "; - return os; -} - -// ------------------------------------------------------------------ -// helper class for printing time since start -// ------------------------------------------------------------------ -struct current_time_print_helper -{ -}; - -inline std::ostream& -operator<<(std::ostream& os, current_time_print_helper const&) -{ - using namespace std::chrono; - static steady_clock::time_point log_t_start = steady_clock::now(); - // - auto now = steady_clock::now(); - auto nowt = duration_cast(now - log_t_start).count(); - // - os << debug::dec<10>(nowt) << " "; - return os; -} - -template -void -display(char const* prefix, Args const&... args) -{ - // using a temp stream object with a single copy to cout at the end - // prevents multiple threads from injecting overlapping text - std::stringstream tempstream; - tempstream << prefix << detail::current_time_print_helper() - << detail::current_thread_print_helper() << detail::hostname_print_helper(); - ((tempstream << args << " "), ...); - tempstream << "\n"; - std::cout << tempstream.str() << std::flush; -} - -template -void -debug(Args const&... args) -{ - display(" ", args...); -} - -template -void -warning(Args const&... args) -{ - display(" ", args...); -} - -template -void -error(Args const&... args) -{ - display(" ", args...); -} - -template -void -scope(Args const&... args) -{ - display(" ", args...); -} - -template -void -trace(Args const&... args) -{ - display(" ", args...); -} - -template -void -timed(Args const&... args) -{ - display(" ", args...); -} -} // namespace detail - -template -struct scoped_var -{ - // capture tuple elements by reference - no temp vars in constructor please - char const* prefix_; - std::tuple const message_; - std::string buffered_msg; - - // - scoped_var(char const* p, Args const&... args) - : prefix_(p) - , message_(args...) - { - std::stringstream tempstream; - detail::tuple_print(tempstream, message_); - buffered_msg = tempstream.str(); - detail::display(" ", prefix_, debug::str<>(">> enter <<"), tempstream.str()); - } - ~scoped_var() { detail::display(" ", prefix_, debug::str<>("<< leave >>"), buffered_msg); } -}; - -template -struct timed_var -{ - mutable std::chrono::steady_clock::time_point time_start_; - double const delay_; - std::tuple const message_; - // - timed_var(double const& delay, Args const&... args) - : time_start_(std::chrono::steady_clock::now()) - , delay_(delay) - , message_(args...) - { - } + // ------------------------------------------------------------------ + // helper class for printing time since start + // ------------------------------------------------------------------ + struct current_time_print_helper + { + }; - bool elapsed(std::chrono::steady_clock::time_point const& now) const - { - double elapsed_ = - std::chrono::duration_cast>(now - time_start_).count(); + inline std::ostream& operator<<(std::ostream& os, current_time_print_helper const&) + { + using namespace std::chrono; + static steady_clock::time_point log_t_start = steady_clock::now(); + // + auto now = steady_clock::now(); + auto nowt = duration_cast(now - log_t_start).count(); + // + os << debug::dec<10>(nowt) << " "; + return os; + } - if (elapsed_ > delay_) + template + void display(char const* prefix, Args const&... args) { - time_start_ = now; - return true; + // using a temp stream object with a single copy to cout at the end + // prevents multiple threads from injecting overlapping text + std::stringstream tempstream; + tempstream << prefix << detail::current_time_print_helper() + << detail::current_thread_print_helper() << detail::hostname_print_helper(); + ((tempstream << args << " "), ...); + tempstream << "\n"; + std::cout << tempstream.str() << std::flush; } - return false; - } - friend std::ostream& operator<<(std::ostream& os, timed_var const& ti) - { - detail::tuple_print(os, ti.message_); - return os; - } -}; + template + void debug(Args const&... args) + { + display(" ", args...); + } -/////////////////////////////////////////////////////////////////////////// -template -struct enable_print; + template + void warning(Args const&... args) + { + display(" ", args...); + } -// when false, debug statements should produce no code -template<> -struct enable_print -{ - constexpr enable_print(const char*) {} + template + void error(Args const&... args) + { + display(" ", args...); + } - constexpr bool is_enabled() const { return false; } + template + void scope(Args const&... args) + { + display(" ", args...); + } - template - constexpr void debug(Args const&...) const - { - } + template + void trace(Args const&... args) + { + display(" ", args...); + } - template - constexpr void warning(Args const&...) const - { - } + template + void timed(Args const&... args) + { + display(" ", args...); + } + } // namespace detail - template - constexpr void trace(Args const&...) const + template + struct scoped_var { - } + // capture tuple elements by reference - no temp vars in constructor please + char const* prefix_; + std::tuple const message_; + std::string buffered_msg; - template - constexpr void error(Args const&...) const - { - } + // + scoped_var(char const* p, Args const&... args) + : prefix_(p) + , message_(args...) + { + std::stringstream tempstream; + detail::tuple_print(tempstream, message_); + buffered_msg = tempstream.str(); + detail::display(" ", prefix_, debug::str<>(">> enter <<"), tempstream.str()); + } - template - constexpr void timed(Args const&...) const - { - } + ~scoped_var() + { + detail::display(" ", prefix_, debug::str<>("<< leave >>"), buffered_msg); + } + }; + + template + struct timed_var + { + mutable std::chrono::steady_clock::time_point time_start_; + double const delay_; + std::tuple const message_; + // + timed_var(double const& delay, Args const&... args) + : time_start_(std::chrono::steady_clock::now()) + , delay_(delay) + , message_(args...) + { + } - template - constexpr void array(std::string const&, std::vector const&) const - { - } + bool elapsed(std::chrono::steady_clock::time_point const& now) const + { + double elapsed_ = + std::chrono::duration_cast>(now - time_start_) + .count(); - template - constexpr void array(std::string const&, std::array const&) const - { - } + if (elapsed_ > delay_) + { + time_start_ = now; + return true; + } + return false; + } - template - constexpr void array(std::string const&, Iter, Iter) const - { - } + friend std::ostream& operator<<(std::ostream& os, timed_var const& ti) + { + detail::tuple_print(os, ti.message_); + return os; + } + }; - template - constexpr bool scope(Args const&...) - { - return true; - } + /////////////////////////////////////////////////////////////////////////// + template + struct enable_print; - template - constexpr bool declare_variable(Args const&...) const + // when false, debug statements should produce no code + template <> + struct enable_print { - return true; - } + constexpr enable_print(char const*) {} - template - constexpr void set(T&, V const&) - { - } + constexpr bool is_enabled() const { return false; } - // @todo, return void so that timers have zero footprint when disabled - template - constexpr int make_timer(const double, Args const&...) const - { - return 0; - } + template + constexpr void debug(Args const&...) const + { + } - template - constexpr bool eval(Expr const&) - { - return true; - } -}; - -// when true, debug statements produce valid output -template<> -struct enable_print -{ - private: - char const* prefix_; - - public: - constexpr enable_print() - : prefix_("") - { - } + template + constexpr void warning(Args const&...) const + { + } - constexpr enable_print(const char* p) - : prefix_(p) - { - } + template + constexpr void trace(Args const&...) const + { + } - constexpr bool is_enabled() const { return true; } + template + constexpr void error(Args const&...) const + { + } - template - constexpr void debug(Args const&... args) const - { - detail::debug(prefix_, args...); - } + template + constexpr void timed(Args const&...) const + { + } - template - constexpr void warning(Args const&... args) const - { - detail::warning(prefix_, args...); - } + template + constexpr void array(std::string const&, std::vector const&) const + { + } - template - constexpr void trace(Args const&... args) const - { - detail::trace(prefix_, args...); - } + template + constexpr void array(std::string const&, std::array const&) const + { + } - template - constexpr void error(Args const&... args) const - { - detail::error(prefix_, args...); - } + template + constexpr void array(std::string const&, Iter, Iter) const + { + } - template - scoped_var scope(Args const&... args) - { - return scoped_var(prefix_, args...); - } + template + constexpr bool scope(Args const&...) + { + return true; + } - template - void timed(timed_var const& init, Args const&... args) const - { - auto now = std::chrono::steady_clock::now(); - if (init.elapsed(now)) { detail::timed(prefix_, init, args...); } - } + template + constexpr bool declare_variable(Args const&...) const + { + return true; + } - template - void array(std::string const& name, std::vector const& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } + template + constexpr void set(T&, V const&) + { + } - template - void array(std::string const& name, const std::array& v) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; - std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); - std::cout << "\n"; - } + // @todo, return void so that timers have zero footprint when disabled + template + constexpr int make_timer(double const, Args const&...) const + { + return 0; + } - template - void array(std::string const& name, Iter begin, Iter end) const - { - std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(std::distance(begin, end)) - << "} : "; - std::copy(begin, end, - std::ostream_iterator::value_type>(std::cout, - ", ")); - std::cout << std::endl; - } + template + constexpr bool eval(Expr const&) + { + return true; + } + }; - template - T declare_variable(Args const&... args) const + // when true, debug statements produce valid output + template <> + struct enable_print { - return T(args...); - } + private: + char const* prefix_; - template - void set(T& var, V const& val) - { - var = val; - } + public: + constexpr enable_print() + : prefix_("") + { + } + + constexpr enable_print(char const* p) + : prefix_(p) + { + } + + constexpr bool is_enabled() const { return true; } + + template + constexpr void debug(Args const&... args) const + { + detail::debug(prefix_, args...); + } + + template + constexpr void warning(Args const&... args) const + { + detail::warning(prefix_, args...); + } + + template + constexpr void trace(Args const&... args) const + { + detail::trace(prefix_, args...); + } - template - timed_var make_timer(const double delay, const Args... args) const + template + constexpr void error(Args const&... args) const + { + detail::error(prefix_, args...); + } + + template + scoped_var scope(Args const&... args) + { + return scoped_var(prefix_, args...); + } + + template + void timed(timed_var const& init, Args const&... args) const + { + auto now = std::chrono::steady_clock::now(); + if (init.elapsed(now)) { detail::timed(prefix_, init, args...); } + } + + template + void array(std::string const& name, std::vector const& v) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; + std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); + std::cout << "\n"; + } + + template + void array(std::string const& name, std::array const& v) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(v.size()) << "} : "; + std::copy(std::begin(v), std::end(v), std::ostream_iterator(std::cout, ", ")); + std::cout << "\n"; + } + + template + void array(std::string const& name, Iter begin, Iter end) const + { + std::cout << str<20>(name.c_str()) << ": {" << debug::dec<4>(std::distance(begin, end)) + << "} : "; + std::copy(begin, end, + std::ostream_iterator::value_type>( + std::cout, ", ")); + std::cout << std::endl; + } + + template + T declare_variable(Args const&... args) const + { + return T(args...); + } + + template + void set(T& var, V const& val) + { + var = val; + } + + template + timed_var make_timer(double const delay, Args const... args) const + { + return timed_var(delay, args...); + } + + template + auto eval(Expr const& e) + { + return e(); + } + }; + + // ------------------------------------------------------------------ + // helper for N>M true/false + // ------------------------------------------------------------------ + template + struct check_level : std::integral_constant { - return timed_var(delay, args...); - } + }; - template - auto eval(Expr const& e) + template + struct print_threshold : enable_print::value> { - return e(); - } -}; - -// ------------------------------------------------------------------ -// helper for N>M true/false -// ------------------------------------------------------------------ -template -struct check_level : std::integral_constant -{ -}; - -template -struct print_threshold : enable_print::value> -{ - using base_type = enable_print::value>; - // inherit constructor - using base_type::base_type; -}; - -} // namespace NS_DEBUG + using base_type = enable_print::value>; + // inherit constructor + using base_type::base_type; + }; + +} // namespace NS_DEBUG /// \endcond diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index d00e0367..58f15dd5 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -13,90 +13,88 @@ #include "../request_state_base.hpp" #include "./operation_context.hpp" -namespace oomph -{ -namespace detail -{ - -struct request_state -: public util::enable_shared_from_this -, public request_state_base -{ - using base = request_state_base; - using shared_ptr_t = util::unsafe_shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - util::unsafe_shared_ptr m_self_ptr; - - request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, std::size_t* scheduled, - rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - } - - void progress(); - - bool cancel(); - - void create_self_ref() - { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } -}; - -struct shared_request_state -: public std::enable_shared_from_this -, public request_state_base -{ - using base = request_state_base; - using shared_ptr_t = std::shared_ptr; - using operation_context = libfabric::operation_context; - - operation_context m_operation_context; - std::shared_ptr m_self_ptr; - - shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, - std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) - : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} - , m_operation_context{this} - { - [[maybe_unused]] auto scp = libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - } +namespace oomph { namespace detail { - ~shared_request_state() + struct request_state + : public util::enable_shared_from_this + , public request_state_base { - [[maybe_unused]] auto scp = libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - } - - void progress(); - - bool cancel(); - - void create_self_ref() + using base = request_state_base; + using shared_ptr_t = util::unsafe_shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + util::unsafe_shared_ptr m_self_ptr; + + request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, + std::size_t* scheduled, rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + } + + void progress(); + + bool cancel(); + + void create_self_ref() + { + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } + }; + + struct shared_request_state + : public std::enable_shared_from_this + , public request_state_base { - // create a self-reference cycle!! - // this is useful if we only keep a raw pointer around internally, which still is supposed - // to keep the object alive - m_self_ptr = shared_from_this(); - } - - shared_ptr_t release_self_ref() noexcept - { - assert(((bool)m_self_ptr) && "doesn't own a self-reference!"); - return std::move(m_self_ptr); - } -}; - -} // namespace detail -} // namespace oomph + using base = request_state_base; + using shared_ptr_t = std::shared_ptr; + using operation_context = libfabric::operation_context; + + operation_context m_operation_context; + std::shared_ptr m_self_ptr; + + shared_request_state(oomph::context_impl* ctxt, oomph::communicator_impl* comm, + std::atomic* scheduled, rank_type rank, tag_type tag, cb_type&& cb) + : base{ctxt, comm, scheduled, rank, tag, std::move(cb)} + , m_operation_context{this} + { + [[maybe_unused]] auto scp = + libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + } + + ~shared_request_state() + { + [[maybe_unused]] auto scp = + libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + } + + void progress(); + + bool cancel(); + + void create_self_ref() + { + // create a self-reference cycle!! + // this is useful if we only keep a raw pointer around internally, which still is supposed + // to keep the object alive + m_self_ptr = shared_from_this(); + } + + shared_ptr_t release_self_ref() noexcept + { + assert(((bool) m_self_ptr) && "doesn't own a self-reference!"); + return std::move(m_self_ptr); + } + }; + +}} // namespace oomph::detail diff --git a/src/libfabric/simple_counter.hpp b/src/libfabric/simple_counter.hpp index f44eac92..26ecf8d5 100644 --- a/src/libfabric/simple_counter.hpp +++ b/src/libfabric/simple_counter.hpp @@ -12,13 +12,13 @@ #include "oomph_libfabric_defines.hpp" // #include -#include #include +#include #ifdef OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS -#define PERFORMANCE_COUNTER_ENABLED true +# define PERFORMANCE_COUNTER_ENABLED true #else -#define PERFORMANCE_COUNTER_ENABLED false +# define PERFORMANCE_COUNTER_ENABLED false #endif // @@ -29,90 +29,86 @@ // the performance counter that will simply do nothing when disabled - but // still allow code that uses the counters in arithmetic to compile. // -namespace oomph -{ -namespace libfabric -{ -template::value>> -struct simple_counter -{ -}; - -// -------------------------------------------------------------------- -// specialization for performance counters Enabled -// we provide an atomic that can be incremented or added/subtracted to -template -struct simple_counter -{ - simple_counter() - : value_{T()} +namespace oomph { namespace libfabric { + template ::value>> + struct simple_counter { - } + }; - simple_counter(const T& init) - : value_{init} + // -------------------------------------------------------------------- + // specialization for performance counters Enabled + // we provide an atomic that can be incremented or added/subtracted to + template + struct simple_counter { - } + simple_counter() + : value_{T()} + { + } - inline operator T() const { return value_; } + simple_counter(T const& init) + : value_{init} + { + } - inline T operator=(const T& x) { return value_ = x; } + inline operator T() const { return value_; } - inline T operator++() { return ++value_; } + inline T operator=(T const& x) { return value_ = x; } - inline T operator++(int x) { return (value_ += x); } + inline T operator++() { return ++value_; } - inline T operator+=(const T& rhs) { return (value_ += rhs); } + inline T operator++(int x) { return (value_ += x); } - inline T operator--() { return --value_; } + inline T operator+=(T const& rhs) { return (value_ += rhs); } - inline T operator--(int x) { return (value_ -= x); } + inline T operator--() { return --value_; } - inline T operator-=(const T& rhs) { return (value_ -= rhs); } + inline T operator--(int x) { return (value_ -= x); } - friend std::ostream& operator<<(std::ostream& os, const simple_counter& x) - { - os << x.value_; - return os; - } + inline T operator-=(T const& rhs) { return (value_ -= rhs); } - std::atomic value_; -}; + friend std::ostream& operator<<(std::ostream& os, simple_counter const& x) + { + os << x.value_; + return os; + } -// -------------------------------------------------------------------- -// specialization for performance counters Disabled -// just return dummy values so that arithmetic operations compile ok -template -struct simple_counter -{ - simple_counter() {} + std::atomic value_; + }; - simple_counter(const T&) {} + // -------------------------------------------------------------------- + // specialization for performance counters Disabled + // just return dummy values so that arithmetic operations compile ok + template + struct simple_counter + { + simple_counter() {} - inline operator T() const { return 0; } + simple_counter(T const&) {} - // inline bool operator==(const T&) { return true; } + inline operator T() const { return 0; } - inline T operator=(const T&) { return 0; } + // inline bool operator==(const T&) { return true; } - inline T operator++() { return 0; } + inline T operator=(T const&) { return 0; } - inline T operator++(int) { return 0; } + inline T operator++() { return 0; } - inline T operator+=(const T&) { return 0; } + inline T operator++(int) { return 0; } - inline T operator--() { return 0; } + inline T operator+=(T const&) { return 0; } - inline T operator--(int) { return 0; } + inline T operator--() { return 0; } - inline T operator-=(const T&) { return 0; } + inline T operator--(int) { return 0; } - friend std::ostream& operator<<(std::ostream& os, const simple_counter&) - { - os << "undefined"; - return os; - } -}; -} // namespace libfabric -} // namespace oomph + inline T operator-=(T const&) { return 0; } + + friend std::ostream& operator<<(std::ostream& os, simple_counter const&) + { + os << "undefined"; + return os; + } + }; +}} // namespace oomph::libfabric diff --git a/test/test_send_recv.cpp b/test/test_send_recv.cpp index 0cfd1170..08afefe8 100644 --- a/test/test_send_recv.cpp +++ b/test/test_send_recv.cpp @@ -7,16 +7,16 @@ * Please, refer to the LICENSE file in the root directory. * SPDX-License-Identifier: BSD-3-Clause */ -#include +#include #include -#include "./mpi_runner/mpi_test_fixture.hpp" -#include #include +#include +#include #include -#include +#include "./mpi_runner/mpi_test_fixture.hpp" -#define NITERS 50 -#define SIZE 64 +#define NITERS 50 +#define SIZE 64 #define NTHREADS 4 std::vector> shared_received(NTHREADS); @@ -33,22 +33,22 @@ struct test_environment_base using tag_type = oomph::tag_type; using message = oomph::message_buffer; - oomph::context& ctxt; + oomph::context& ctxt; oomph::communicator comm; - rank_type speer_rank; - rank_type rpeer_rank; - int thread_id; - int num_threads; - tag_type tag; + rank_type speer_rank; + rank_type rpeer_rank; + int thread_id; + int num_threads; + tag_type tag; test_environment_base(oomph::context& c, int tid, int num_t) - : ctxt(c) - , comm(ctxt.get_communicator()) - , speer_rank((comm.rank() + 1) % comm.size()) - , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) - , thread_id(tid) - , num_threads(num_t) - , tag(tid) + : ctxt(c) + , comm(ctxt.get_communicator()) + , speer_rank((comm.rank() + 1) % comm.size()) + , rpeer_rank((comm.rank() + comm.size() - 1) % comm.size()) + , thread_id(tid) + , num_threads(num_t) + , tag(tid) { } }; @@ -57,25 +57,26 @@ struct test_environment : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* ptr) { - if (user_alloc) return comm.make_buffer(ptr, size); + if (user_alloc) + return comm.make_buffer(ptr, size); else return comm.make_buffer(size); } std::vector raw_smsg; std::vector raw_rmsg; - message smsg; - message rmsg; + message smsg; + message rmsg; test_environment(oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) - : base(c, tid, num_t) - , raw_smsg(user_alloc ? size : 0) - , raw_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) - , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) + : base(c, tid, num_t) + , raw_smsg(user_alloc ? size : 0) + , raw_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_smsg.data())) + , rmsg(make_buffer(comm, size, user_alloc, raw_rmsg.data())) { fill_send_buffer(); fill_recv_buffer(); @@ -104,10 +105,11 @@ struct test_environment_device : public test_environment_base { using base = test_environment_base; - static auto make_buffer(oomph::communicator& comm, std::size_t size, bool user_alloc, - rank_type* device_ptr) + static auto make_buffer( + oomph::communicator& comm, std::size_t size, bool user_alloc, rank_type* device_ptr) { - if (user_alloc) return comm.make_device_buffer(device_ptr, size, 0); + if (user_alloc) + return comm.make_device_buffer(device_ptr, size, 0); else return comm.make_device_buffer(size, 0); } @@ -120,37 +122,37 @@ struct test_environment_device : public test_environment_base if (size) m_ptr = hwmalloc::device_malloc(size * sizeof(rank_type)); } device_allocation(device_allocation&& other) - : m_ptr{std::exchange(other.m_ptr, nullptr)} + : m_ptr{std::exchange(other.m_ptr, nullptr)} { } ~device_allocation() { -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY if (m_ptr) hwmalloc::device_free(m_ptr); -#endif +# endif } - rank_type* get() const noexcept { return (rank_type*)m_ptr; } + rank_type* get() const noexcept { return (rank_type*) m_ptr; } }; device_allocation raw_device_smsg; device_allocation raw_device_rmsg; - message smsg; - message rmsg; - - test_environment_device(oomph::context& c, std::size_t size, int tid, int num_t, - bool user_alloc) - : base(c, tid, num_t) -#ifndef OOMPH_TEST_LEAK_GPU_MEMORY - , raw_device_smsg(user_alloc ? size : 0) - , raw_device_rmsg(user_alloc ? size : 0) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#else - , raw_device_smsg(size) - , raw_device_rmsg(size) - , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) - , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) -#endif + message smsg; + message rmsg; + + test_environment_device( + oomph::context& c, std::size_t size, int tid, int num_t, bool user_alloc) + : base(c, tid, num_t) +# ifndef OOMPH_TEST_LEAK_GPU_MEMORY + , raw_device_smsg(user_alloc ? size : 0) + , raw_device_rmsg(user_alloc ? size : 0) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# else + , raw_device_smsg(size) + , raw_device_rmsg(size) + , smsg(make_buffer(comm, size, user_alloc, raw_device_smsg.get())) + , rmsg(make_buffer(comm, size, user_alloc, raw_device_rmsg.get())) +# endif { fill_send_buffer(); fill_recv_buffer(); @@ -178,9 +180,8 @@ struct test_environment_device : public test_environment_base }; #endif -template -void -launch_test(Func f) +template +void launch_test(Func f) { // single threaded { @@ -193,7 +194,7 @@ launch_test(Func f) // multi threaded { - oomph::context ctxt(MPI_COMM_WORLD, true); + oomph::context ctxt(MPI_COMM_WORLD, true); std::vector threads; threads.reserve(NTHREADS); reset_counters(); @@ -210,9 +211,9 @@ launch_test(Func f) // no callback // =========== -template -void -test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { Env env(ctxt, size, tid, num_threads, user_alloc); @@ -221,10 +222,7 @@ test_send_recv(oomph::context& ctxt, std::size_t size, int tid, int num_threads, { auto rreq = env.comm.recv(env.rmsg, env.rpeer_rank, env.tag); auto sreq = env.comm.send(env.smsg, env.speer_rank, env.tag); - while (!(rreq.is_ready() && sreq.is_ready())) - { - env.comm.progress(); - }; + while (!(rreq.is_ready() && sreq.is_ready())) { env.comm.progress(); }; EXPECT_TRUE(env.check_recv_buffer()); env.fill_recv_buffer(); } @@ -260,9 +258,9 @@ TEST_F(mpi_test_fixture, send_recv) // callback: pass by l-value reference // =================================== -template -void -test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) +template +void test_send_recv_cb( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -270,8 +268,8 @@ test_send_recv_cb(oomph::context& ctxt, std::size_t size, int tid, int num_threa Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; auto send_callback = [&](message const&, rank_type, tag_type) { ++sent; }; auto recv_callback = [&](message&, rank_type, tag_type) { ++received; }; @@ -327,10 +325,9 @@ TEST_F(mpi_test_fixture, send_recv_cb) // callback: pass by r-value reference (give up ownership) // ======================================================= -template -void -test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -338,16 +335,14 @@ test_send_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int nu Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { + auto recv_callback = [&](message msg, rank_type, tag_type) { ++received; env.rmsg = std::move(msg); }; @@ -403,10 +398,9 @@ TEST_F(mpi_test_fixture, send_recv_cb_disown) // callback: pass by r-value reference (give up ownership), shared recv // ==================================================================== -template -void -test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_shared_recv_cb_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -417,15 +411,13 @@ test_send_shared_recv_cb_disown(oomph::context& ctxt, std::size_t size, int tid, thread_id = env.thread_id; //volatile int received = 0; - volatile int sent = 0; + int volatile sent = 0; - auto send_callback = [&](message msg, rank_type, tag_type) - { + auto send_callback = [&](message msg, rank_type, tag_type) { ++sent; env.smsg = std::move(msg); }; - auto recv_callback = [&](message msg, rank_type, tag_type) - { + auto recv_callback = [&](message msg, rank_type, tag_type) { //std::cout << thread_id << " " << env.thread_id << std::endl; //if (thread_id != env.thread_id) std::cout << "other thread picked up callback" << std::endl; //else std::cout << "my thread picked up callback" << std::endl; @@ -485,10 +477,9 @@ TEST_F(mpi_test_fixture, send_shared_recv_cb_disown) // callback: pass by l-value reference, and resubmit // ================================================= -template -void -test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -496,13 +487,13 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message& msg, rank_type dst, tag_type tag) { @@ -513,8 +504,8 @@ test_send_recv_cb_resubmit(oomph::context& ctxt, std::size_t size, int tid, int struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message& msg, rank_type src, tag_type tag) { @@ -541,10 +532,9 @@ TEST_F(mpi_test_fixture, send_recv_cb_resubmit) // callback: pass by r-value reference (give up ownership), and resubmit // ===================================================================== -template -void -test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int tid, int num_threads, - bool user_alloc) +template +void test_send_recv_cb_resubmit_disown( + oomph::context& ctxt, std::size_t size, int tid, int num_threads, bool user_alloc) { using rank_type = test_environment::rank_type; using tag_type = test_environment::tag_type; @@ -552,13 +542,13 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti Env env(ctxt, size, tid, num_threads, user_alloc); - volatile int received = 0; - volatile int sent = 0; + int volatile received = 0; + int volatile sent = 0; struct recursive_send_callback { - Env& env; - volatile int& sent; + Env& env; + int volatile& sent; void operator()(message msg, rank_type dst, tag_type tag) { @@ -570,8 +560,8 @@ test_send_recv_cb_resubmit_disown(oomph::context& ctxt, std::size_t size, int ti struct recursive_recv_callback { - Env& env; - volatile int& received; + Env& env; + int volatile& received; void operator()(message msg, rank_type src, tag_type tag) { From cf4b3bf340cfe5b89b52a4e8ad174c00a3331daa Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 11:30:23 +0200 Subject: [PATCH 52/68] disable clang-format for cmake generated code, fix missing include --- cmake/config.hpp.in | 3 +++ cmake/oomph_defs.hpp.in | 2 ++ include/oomph/detail/communicator_helper.hpp | 1 + 3 files changed, 6 insertions(+) diff --git a/cmake/config.hpp.in b/cmake/config.hpp.in index 458b038a..e9fcf5e4 100644 --- a/cmake/config.hpp.in +++ b/cmake/config.hpp.in @@ -26,9 +26,12 @@ #cmakedefine01 OOMPH_USE_FAST_PIMPL #cmakedefine01 OOMPH_ENABLE_BARRIER + +// clang-format off #define OOMPH_RECURSION_DEPTH @OOMPH_RECURSION_DEPTH@ #define OOMPH_VERSION @OOMPH_VERSION_NUMERIC@ #define OOMPH_VERSION_MAJOR @OOMPH_VERSION_MAJOR@ #define OOMPH_VERSION_MINOR @OOMPH_VERSION_MINOR@ #define OOMPH_VERSION_PATCH @OOMPH_VERSION_PATCH@ +// clang-format on diff --git a/cmake/oomph_defs.hpp.in b/cmake/oomph_defs.hpp.in index 70ae8732..a52a943f 100644 --- a/cmake/oomph_defs.hpp.in +++ b/cmake/oomph_defs.hpp.in @@ -15,7 +15,9 @@ namespace oomph { namespace fort { + // clang-format off using fp_type = @OOMPH_FORTRAN_FP@; + // clang-format on typedef enum { OomphBarrierGlobal=1, OomphBarrierThread=2, diff --git a/include/oomph/detail/communicator_helper.hpp b/include/oomph/detail/communicator_helper.hpp index 44f6d828..8335c6eb 100644 --- a/include/oomph/detail/communicator_helper.hpp +++ b/include/oomph/detail/communicator_helper.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include //#include From de6158ed6afb1f0df082f52a8a348f87d7553c9e Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 13:51:34 +0200 Subject: [PATCH 53/68] Add shm provider support to libfabric transport layer The shm addressing method uses a string type, we add a convenience function to convert an fi_addr type to string to help with displaying addresses from any provider Add a simple libfabric utility checker that displays fi_info so that when testing for the first time on a new machine one can try to view the capabilities/options exposed by the provider selected --- CMakeLists.txt | 9 ++--- cmake/oomph_libfabric.cmake | 9 ++--- src/libfabric/CMakeLists.txt | 21 +++++++++++ src/libfabric/context.cpp | 18 +++++----- src/libfabric/context.hpp | 6 ++-- src/libfabric/controller.hpp | 2 +- src/libfabric/controller_base.hpp | 50 +++++++++++++++----------- src/libfabric/fabric_error.hpp | 2 +- src/libfabric/locality.hpp | 22 +++++++++++- src/libfabric/test/check_libfabric.cpp | 29 +++++++++++++++ 10 files changed, 123 insertions(+), 45 deletions(-) create mode 100644 src/libfabric/test/check_libfabric.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 90a582d1..3db53422 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,6 @@ cmake_minimum_required(VERSION 3.17) # CMake version is set at 3.17 because of find_package(CUDAToolkit) -if (NOT ${CMAKE_VERSION} VERSION_LESS 3.27) - # new in 3.27: additionally use uppercase _ROOT - # environment and CMake variables for find_package - cmake_policy(SET CMP0144 NEW) -endif() - set(OOMPH_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") list(APPEND CMAKE_MODULE_PATH "${OOMPH_MODULE_PATH}") @@ -28,6 +22,7 @@ endfunction() set_policy(CMP0074 NEW) # find_package uses XXX_ROOT vars using PackageName set_policy(CMP0144 NEW) # find_package allows XXX_ROOT vars using PACKAGENAME Uppercase +set_policy(CMP0167 NEW) # find_package uses new boost config (boost 1.70 onwards) # --------------------------------------------------------------------- # CMake setup, C++ version, build type, modules, etc @@ -92,7 +87,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/oomph/config.hpp @ONLY) install(FILES ${PROJECT_BINARY_DIR}/include/oomph/config.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/oomph) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in ${CMAKE_CURRENT_BINARY_DIR}/include/oomph/cmake_config.inc) # --------------------------------------------------------------------- diff --git a/cmake/oomph_libfabric.cmake b/cmake/oomph_libfabric.cmake index 758f3f4d..18c90369 100644 --- a/cmake/oomph_libfabric.cmake +++ b/cmake/oomph_libfabric.cmake @@ -95,7 +95,7 @@ if (OOMPH_WITH_LIBFABRIC) set(OOMPH_LIBFABRIC_PROVIDER "tcp" CACHE STRING "The provider (cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband))") set_property(CACHE OOMPH_LIBFABRIC_PROVIDER PROPERTY STRINGS - "cxi" "efa" "gni" "psm2" "tcp" "verbs") + "cxi" "efa" "gni" "psm2" "tcp" "verbs" "shm") oomph_libfabric_add_config_define_namespace( DEFINE HAVE_LIBFABRIC_PROVIDER @@ -141,6 +141,10 @@ if (OOMPH_WITH_LIBFABRIC) oomph_libfabric_add_config_define_namespace( DEFINE HAVE_LIBFABRIC_PSM2 NAMESPACE libfabric) + elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "shm") + oomph_libfabric_add_config_define_namespace( + DEFINE HAVE_LIBFABRIC_SHM + NAMESPACE libfabric) endif() #------------------------------------------------------------------------------ @@ -171,6 +175,3 @@ if (OOMPH_WITH_LIBFABRIC) ) target_include_directories(oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric") endif() - - - diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index c82e387d..fa99a413 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -20,3 +20,24 @@ target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) target_sources(oomph_libfabric PRIVATE locality.cpp) + +# if we are using GPU, then the libfabric library was probably built with +# gpu support, and we should link to cuda to prevent link errors +if (HWMALLOC_ENABLE_DEVICE) + include(CheckLanguage) + check_language(CUDA) + + if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + else() + message(STATUS "No CUDA support") + return() + endif() + + find_package(CUDAToolkit) + target_link_libraries(oomph_libfabric PRIVATE CUDA::cudart) +endif() + +add_executable(check_libfabric test/check_libfabric.cpp) +target_link_libraries(check_libfabric PUBLIC oomph_libfabric) +target_include_directories(check_libfabric PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index cb7757a2..68112e9e 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -23,7 +23,7 @@ namespace oomph { using controller_type = libfabric::controller; context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve) + std::size_t message_pool_reserve, bool debug) : context_base(comm, thread_safe) , m_heap{this, message_pool_never_free, message_pool_reserve} , m_recv_cb_queue(128) @@ -40,12 +40,12 @@ namespace oomph { debug::ptr(m_ctxt_tag))); // TODO fix the thread safety - // problem: controller is a singleton and has problems when 2 contexts are created in the - // following order: single threaded first, then multi-threaded after - //int threads = thread_safe ? std::thread::hardware_concurrency() : 1; - //int threads = std::thread::hardware_concurrency(); + // problem: controller is a singleton and has problems when 2 contexts are created + // in the following order: single threaded first, then multi-threaded after + // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; + // int threads = std::thread::hardware_concurrency(); int threads = boost::thread::physical_concurrency(); - m_controller = init_libfabric_controller(this, comm, rank, size, threads); + m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); m_domain = m_controller->get_domain(); } @@ -65,14 +65,15 @@ namespace oomph { { static char buffer[32]; std::string temp = std::to_string(m_controller->rendezvous_threshold()); - strncpy(buffer, temp.c_str(), std::min(size_t(31), std::strlen(temp.c_str()))); + if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); + strcpy(buffer, temp.c_str()); return buffer; } else { return "unspecified"; } } std::shared_ptr context_impl::init_libfabric_controller( - oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads) + oomph::context_impl* /*ctx*/, MPI_Comm comm, int rank, int size, int threads, bool debug) { // only allow one thread to pass, make other wait static std::mutex m_init_mutex; @@ -84,6 +85,7 @@ namespace oomph { debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", debug::dec<3>(size), "threads", debug::dec<3>(threads))); instance.reset(new controller_type()); + if (debug) instance->enable_debug(); instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); } return instance; diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index 7a936223..e8e71837 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -49,8 +49,8 @@ namespace oomph { // -------------------------------------------------- // create a singleton ptr to a libfabric controller that // can be shared between oomph context objects - static std::shared_ptr init_libfabric_controller( - oomph::context_impl* ctx, MPI_Comm comm, int rank, int size, int threads); + static std::shared_ptr init_libfabric_controller(oomph::context_impl* ctx, + MPI_Comm comm, int rank, int size, int threads, bool debug = false); // queue for shared recv callbacks callback_queue m_recv_cb_queue; @@ -59,7 +59,7 @@ namespace oomph { public: context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve); + std::size_t message_pool_reserve, bool debug = false); context_impl(context_impl const&) = delete; context_impl(context_impl&&) = delete; diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 95e3ad17..4b711ea9 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -53,7 +53,7 @@ namespace NS_DEBUG { template inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); // - static NS_DEBUG::enable_print cnt_err("CONTROL"); + static NS_DEBUG::enable_print cnt_err("CONTROL"); } // namespace NS_DEBUG namespace oomph::libfabric { diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index a5eb1705..5e7bd133 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -45,8 +45,8 @@ #include "memory_region.hpp" #include "operation_context_base.hpp" -//#define DISABLE_FI_INJECT -//#define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 +// #define DISABLE_FI_INJECT +// #define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50 // ------------------------------------------------------------------ @@ -165,9 +165,9 @@ static int libfabric_rendezvous_threshold(int def_val) // ------------------------------------------------ #ifdef HAVE_LIBFABRIC_GNI # include "rdma/fi_ext_gni.h" -//#define OOMPH_GNI_REG "none" +// #define OOMPH_GNI_REG "none" # define OOMPH_GNI_REG "internal" -//#define OOMPH_GNI_REG "udreg" +// #define OOMPH_GNI_REG "udreg" static std::vector> gni_strs = { {GNI_MR_CACHE, "GNI_MR_CACHE"}, @@ -203,9 +203,9 @@ static std::vector> gni_ints = { // clang-format on #endif -// the libfabric library expects us to ask for an API supported version, so if we know we support -// api 2.0, then we ask for that, but the cxi legacy library on daint only supports 1.15, -// so drop back to that version if needed +// the libfabric library expects us to ask for an API supported version, so if +// we know we support api 2.0, then we ask for that, but the cxi legacy library +// on daint only supports 1.15, so drop back to that version if needed #if defined(OOMPH_LIBFABRIC_V1_API) # define LIBFABRIC_FI_VERSION_MAJOR 1 # define LIBFABRIC_FI_VERSION_MINOR 15 @@ -382,6 +382,7 @@ namespace NS_LIBFABRIC { endpoint_context_pool tx_endpoints_; endpoint_context_pool rx_endpoints_; + bool display_fabric_info_; // for debugging purposes, show fi_info hints struct fi_info* fabric_info_; struct fid_fabric* fabric_; struct fid_domain* fabric_domain_; @@ -441,6 +442,7 @@ namespace NS_LIBFABRIC { : eps_(nullptr) , tx_endpoints_(1) , rx_endpoints_(1) + , display_fabric_info_(false) , fabric_info_(nullptr) , fabric_(nullptr) , fabric_domain_(nullptr) @@ -511,6 +513,10 @@ namespace NS_LIBFABRIC { fi_freeinfo(fabric_info_); } + // -------------------------------------------------------------------- + // only used in check_libfabric quick test for helpful output + void enable_debug() { display_fabric_info_ = true; } + // -------------------------------------------------------------------- // setup an endpoint for receiving messages, // usually an rx endpoint is shared by all threads @@ -580,7 +586,8 @@ namespace NS_LIBFABRIC { else if (endpoint_type_ != endpoint_type::scalableTxRx) { #if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || \ - defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || defined(HAVE_LIBFABRIC_EFA) + defined(HAVE_LIBFABRIC_SHM) || defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_CXI) || \ + defined(HAVE_LIBFABRIC_EFA) // it appears that the rx endpoint cannot be enabled if it does not // have a Tx CQ (at least when using sockets), so we create a dummy // Tx CQ and bind it just to stop libfabric from triggering an error. @@ -792,6 +799,8 @@ namespace NS_LIBFABRIC { fabric_hints_->addr_format = FI_SOCKADDR_IN; #elif defined(HAVE_LIBFABRIC_EFA) fabric_hints_->addr_format = FI_ADDR_EFA; +#elif defined(HAVE_LIBFABRIC_SHM) + fabric_hints_->addr_format = FI_ADDR_STR; #endif fabric_hints_->caps = caps_flags(); @@ -824,7 +833,7 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); // Enable thread safe mode (Does not work with psm2 provider) // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - //fabric_hints_->domain_attr->threading = FI_THREAD_FID; + // fabric_hints_->domain_attr->threading = FI_THREAD_FID; fabric_hints_->domain_attr->threading = threadlevel_flags(); } else @@ -940,6 +949,12 @@ namespace NS_LIBFABRIC { // is set by querying the tx/tx attr sizes tx_attr_size_ = std::min(size_t(512), fabric_info_->tx_attr->size / 2); rx_attr_size_ = std::min(size_t(512), fabric_info_->rx_attr->size / 2); + // Print fabric info to a human-readable string if available + if (display_fabric_info_ && fabric_info_) + { + char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); + if (info_str) { std::cout << "Libfabric fabric info:\n" << info_str << std::endl; } + } fi_freeinfo(fabric_hints_); } @@ -1237,21 +1252,16 @@ namespace NS_LIBFABRIC { { std::string err = std::to_string(addrlen) + "=" + std::to_string(locality_defs::array_size); - NS_LIBFABRIC::fabric_error(ret, "fi_getname - size error or other problem " + err); + NS_LIBFABRIC::fabric_error(ret, "fi_getname - error (address size ?) " + err); } // optimized out when debug logging is false if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - std::stringstream temp1; - for (std::size_t i = 0; i < locality_defs::array_length; ++i) - { - temp1 << debug::ipaddr(&local_addr[i]) << " - "; - } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), "size", debug::dec<>(addrlen), " : ", - temp1.str().c_str())); + debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", + locality::to_str(local_addr, av_))); + std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) { @@ -1310,7 +1320,7 @@ namespace NS_LIBFABRIC { inline bool isTerminated() { return false; - //return (qp_endpoint_map_.size() == 0); + // return (qp_endpoint_map_.size() == 0); } // -------------------------------------------------------------------- @@ -1322,7 +1332,7 @@ namespace NS_LIBFABRIC { { int ret = fi_av_lookup(av_, fi_addr_t(i), addr.fabric_data_writable(), &addrlen); addr.set_fi_address(fi_addr_t(i)); - if ((ret == 0) && (addrlen == locality_defs::array_size)) + if ((ret == 0) && (addrlen <= locality_defs::array_size)) { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 325975a7..b1508f28 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -19,7 +19,7 @@ namespace NS_DEBUG { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print err_deb("ERROR__"); + static NS_DEBUG::enable_print err_deb("ERROR__"); } // namespace NS_DEBUG namespace NS_LIBFABRIC { diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 84f5ddc2..1fd35425 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -15,9 +15,11 @@ #include #include // +#include +#include +// #include #include -#include // #include "oomph_libfabric_defines.hpp" @@ -45,6 +47,10 @@ # define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif +#if defined(HAVE_LIBFABRIC_SHM) +# define HAVE_LIBFABRIC_LOCALITY_SIZE 24 +#endif + namespace oomph { // cppcheck-suppress ConfigurationNotChecked static NS_DEBUG::enable_print loc_deb("LOCALTY"); @@ -184,6 +190,8 @@ namespace oomph { namespace libfabric { return data_[0]; #elif defined(HAVE_LIBFABRIC_EFA) return data_[0]; +#elif defined(HAVE_LIBFABRIC_SHM) + return data_[0]; #else throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif @@ -199,6 +207,8 @@ namespace oomph { namespace libfabric { return data[0]; #elif defined(HAVE_LIBFABRIC_EFA) return data[0]; +#elif defined(HAVE_LIBFABRIC_SHM) + return data[0]; #else throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); #endif @@ -219,6 +229,16 @@ namespace oomph { namespace libfabric { inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } + static std::string to_str(locality_data const& data, struct fid_av* av) + { + char sbuf[256]; + size_t buflen = 256; + char const* straddr_ret = fi_av_straddr(av, data.data(), sbuf, &buflen); + std::string result = straddr_ret ? straddr_ret : ""; + // free((char*)(straddr_ret)); + return result; + } + private: friend bool operator==(locality const& lhs, locality const& rhs) { diff --git a/src/libfabric/test/check_libfabric.cpp b/src/libfabric/test/check_libfabric.cpp new file mode 100644 index 00000000..070c8f11 --- /dev/null +++ b/src/libfabric/test/check_libfabric.cpp @@ -0,0 +1,29 @@ +/* + * ghex-org + * + * Copyright (c) 2014-2023, ETH Zurich + * All rights reserved. + * + * Please, refer to the LICENSE file in the root directory. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include "../benchmarks/mpi_environment.hpp" +// +#include "../communicator.hpp" +#include "../context.hpp" + +int main(int argc, char** argv) +{ + using namespace oomph; + bool const message_pool_never_free = false; + std::size_t const message_pool_reserve = 1024 * 1024 * 128; + bool const multi_threaded = true; + bool debug = true; + // + mpi_environment env(multi_threaded, argc, argv); + auto ctxt = + context_impl(MPI_COMM_WORLD, true, message_pool_never_free, message_pool_reserve, debug); +} From 31a22c68f76d2d4242a2ce2547686d9320dd2ba8 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 14:28:45 +0200 Subject: [PATCH 54/68] Remove unused includes and fix warnings in libfabric backend --- src/libfabric/context.hpp | 1 - src/libfabric/controller.hpp | 11 ----------- src/libfabric/controller_base.hpp | 6 ------ src/libfabric/fabric_error.hpp | 1 - src/libfabric/memory_region.hpp | 3 +-- 5 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index e8e71837..cf02c850 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -10,7 +10,6 @@ #pragma once #include -#include #include #include diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 4b711ea9..53c67bad 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -9,23 +9,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include // #include #include #include -#include // #include #include @@ -38,7 +28,6 @@ #include "controller_base.hpp" #include "fabric_error.hpp" #include "locality.hpp" -#include "memory_region.hpp" #include "oomph_libfabric_defines.hpp" #include "operation_context.hpp" // diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 5e7bd133..51057234 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -9,18 +9,12 @@ */ #pragma once -#include -#include #include -#include -#include #include -#include #include #include #include #include -#include // #include #include diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index b1508f28..2ec59997 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -10,7 +10,6 @@ #pragma once #include -#include #include // #include diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index f1eb5326..f2cd5d45 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -15,7 +15,6 @@ #include // #include -#include #include #include "fabric_error.hpp" @@ -77,7 +76,7 @@ struct fi_mr_attr { // struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; fi_mr_attr attr = { - /*.mr_iov = */ &addresses, + /*.mr_iov = */ {&addresses}, /*.iov_count = */ 1, /*.access = */ access_flags, /*.offset = */ offset, From b286e796c8a9a82762c6a5fe2b8c542dc8191d32 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 14:36:22 +0200 Subject: [PATCH 55/68] Split send/recv test into independent cpu and device mode tests --- test/CMakeLists.txt | 129 +++++++++++++++++++++++++--------------- test/test_send_recv.cpp | 80 +++++++++++++++++-------- 2 files changed, 136 insertions(+), 73 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5217bbaf..d6fbfe1f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,8 @@ add_subdirectory(mpi_runner) -set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Daint)") +set(OOMPH_TEST_LEAK_GPU_MEMORY + OFF + CACHE BOOL "Do not free memory (bug on Piz Daint)") # --------------------------------------------------------------------- # compile tests @@ -10,30 +12,57 @@ set(OOMPH_TEST_LEAK_GPU_MEMORY OFF CACHE BOOL "Do not free memory (bug on Piz Da set(serial_tests test_unique_function test_unsafe_shared_ptr) # list of parallel tests to be executed -set(parallel_tests test_context test_send_recv test_send_multi test_cancel test_locality) -#test_tag_range) -if (OOMPH_ENABLE_BARRIER) - list(APPEND parallel_tests test_barrier) +set(parallel_tests test_context test_send_recv test_send_multi test_cancel + test_locality) + +# list of parallel tests that also have device code variants +if(HWMALLOC_ENABLE_DEVICE) + set(device_tests test_send_recv) +endif() + +# test_tag_range) +if(OOMPH_ENABLE_BARRIER) + list(APPEND parallel_tests test_barrier) endif() -# creates an object library (i.e. *.o file) +# creates an object library (i.e. *.o file), if DEVICE is specified, extra flags +# are added and the target name has a suffix function(compile_test t_) - set(t ${t_}_obj) - add_library(${t} OBJECT ${t_}.cpp) - oomph_target_compile_options(${t}) - if (OOMPH_TEST_LEAK_GPU_MEMORY) - target_compile_definitions(${t} PRIVATE OOMPH_TEST_LEAK_GPU_MEMORY) - endif() - target_link_libraries(${t} PRIVATE ext-gtest) - target_link_libraries(${t} PUBLIC oomph) + set(options DEVICE) + cmake_parse_arguments(CT "${options}" "" "" ${ARGN}) + set(source_filename_ "${t_}.cpp") + set(suffix_ "") + if(CT_DEVICE) + # Make a copy the input source file in the build directory, add a suffix + set(suffix_ "_device") + cmake_path(REPLACE_EXTENSION source_filename_ LAST_ONLY "${suffix_}.cpp" + OUTPUT_VARIABLE src_name_) + set(dst_file "${CMAKE_CURRENT_BINARY_DIR}/${src_name_}") + configure_file("${source_filename_}" "${dst_file}" COPYONLY) + set(source_filename_ "${dst_file}") + endif() + set(target_ ${t}${suffix_}_obj) + add_library(${target_} OBJECT ${source_filename_}) + oomph_target_compile_options(${target_}) + target_compile_definitions( + ${target_} + PRIVATE $<$:OOMPH_TEST_LEAK_GPU_MEMORY>) + target_compile_definitions( + ${target_} PRIVATE $<$:TEST_DEVICE_MODE_ONLY>) + target_link_libraries(${target_} PRIVATE ext-gtest) + target_link_libraries(${target_} PUBLIC oomph) endfunction() -# compile an object library for each test -# tests will be compiled only once and then linked against all enabled oomph backends +# compile an object library for each test tests will be compiled only once and +# then linked against all enabled oomph backends list(APPEND all_tests ${serial_tests} ${parallel_tests}) list(REMOVE_DUPLICATES all_tests) foreach(t ${all_tests}) - compile_test(${t}) + compile_test(${t}) + if(${t} IN_LIST device_tests) + # generate a second version of the obj file, but with DEVICE code enabled + compile_test(${t} DEVICE) + endif() endforeach() # --------------------------------------------------------------------- @@ -41,49 +70,55 @@ endforeach() # --------------------------------------------------------------------- function(reg_serial_test t) - add_executable(${t} $) - oomph_target_compile_options(${t}) - target_link_libraries(${t} PRIVATE ext-gtest) - target_link_libraries(${t} PRIVATE oomph_common) - add_test( - NAME ${t} - COMMAND $) + add_executable(${t} $) + oomph_target_compile_options(${t}) + target_link_libraries(${t} PRIVATE ext-gtest) + target_link_libraries(${t} PRIVATE oomph_common) + add_test(NAME ${t} COMMAND $) endfunction() foreach(t ${serial_tests}) - reg_serial_test(${t}) + reg_serial_test(${t}) endforeach() # creates an executable by linking to object file and to selected oomph backend function(reg_parallel_test t_ lib n) - set(t ${t_}_${lib}) - add_executable(${t} $) - oomph_target_compile_options(${t}) - target_link_libraries(${t} PRIVATE gtest_main_mpi) - target_link_libraries(${t} PRIVATE oomph_${lib}) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) - set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) + set(t ${t_}_${lib}) + add_executable(${t} $) + oomph_target_compile_options(${t}) + target_link_libraries(${t} PRIVATE gtest_main_mpi) + target_link_libraries(${t} PRIVATE oomph_${lib}) + add_test(NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} + ${MPIEXEC_PREFLAGS} $ ${MPIEXEC_POSTFLAGS}) + set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) endfunction() -if (OOMPH_WITH_MPI) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} mpi 4) - endforeach() +if(OOMPH_WITH_MPI) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} mpi 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device mpi 4) + endforeach() endif() -if (OOMPH_WITH_UCX) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} ucx 4) - endforeach() +if(OOMPH_WITH_UCX) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} ucx 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device ucx 4) + endforeach() endif() -if (OOMPH_WITH_LIBFABRIC) - foreach(t ${parallel_tests}) - reg_parallel_test(${t} libfabric 4) - endforeach() +if(OOMPH_WITH_LIBFABRIC) + foreach(t ${parallel_tests}) + reg_parallel_test(${t} libfabric 4) + endforeach() + foreach(t ${device_tests}) + reg_parallel_test(${t}_device libfabric 4) + endforeach() endif() add_subdirectory(bindings) diff --git a/test/test_send_recv.cpp b/test/test_send_recv.cpp index 08afefe8..f991df32 100644 --- a/test/test_send_recv.cpp +++ b/test/test_send_recv.cpp @@ -7,13 +7,13 @@ * Please, refer to the LICENSE file in the root directory. * SPDX-License-Identifier: BSD-3-Clause */ -#include +#include #include -#include -#include #include +// use this path because device version in build dir needs to find include +#include #include -#include "./mpi_runner/mpi_test_fixture.hpp" +#include "../test/mpi_runner/mpi_test_fixture.hpp" #define NITERS 50 #define SIZE 64 @@ -248,13 +248,13 @@ void test_send_recv( } } -TEST_F(mpi_test_fixture, send_recv) -{ - launch_test(test_send_recv); -#if HWMALLOC_ENABLE_DEVICE - launch_test(test_send_recv); +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv) { launch_test(test_send_recv); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_device) { launch_test(test_send_recv); } +# endif #endif -} // callback: pass by l-value reference // =================================== @@ -315,13 +315,16 @@ void test_send_recv_cb( EXPECT_EQ(sent, NITERS); } -TEST_F(mpi_test_fixture, send_recv_cb) +#ifndef TEST_DEVICE_MODE_ONLY +TEST_F(mpi_test_fixture, send_recv_cb) { launch_test(test_send_recv_cb); } +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_device) { - launch_test(test_send_recv_cb); -#if HWMALLOC_ENABLE_DEVICE launch_test(test_send_recv_cb); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership) // ======================================================= @@ -388,13 +391,19 @@ void test_send_recv_cb_disown( EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_disown) { launch_test(test_send_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_disown_device) +{ launch_test(test_send_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), shared recv // ==================================================================== @@ -410,7 +419,7 @@ void test_send_shared_recv_cb_disown( thread_id = env.thread_id; - //volatile int received = 0; + // volatile int received = 0; int volatile sent = 0; auto send_callback = [&](message msg, rank_type, tag_type) { @@ -418,9 +427,10 @@ void test_send_shared_recv_cb_disown( env.smsg = std::move(msg); }; auto recv_callback = [&](message msg, rank_type, tag_type) { - //std::cout << thread_id << " " << env.thread_id << std::endl; - //if (thread_id != env.thread_id) std::cout << "other thread picked up callback" << std::endl; - //else std::cout << "my thread picked up callback" << std::endl; + // std::cout << thread_id << " " << env.thread_id << std::endl; + // if (thread_id != env.thread_id) std::cout << "other thread picked up + // callback" << std::endl; else std::cout << "my thread picked up callback" + // << std::endl; env.rmsg = std::move(msg); ++shared_received[env.thread_id]; }; @@ -467,13 +477,19 @@ void test_send_shared_recv_cb_disown( EXPECT_EQ(sent, NITERS); } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_shared_recv_cb_disown) { launch_test(test_send_shared_recv_cb_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_shared_recv_cb_disown_device) +{ launch_test(test_send_shared_recv_cb_disown); -#endif } +# endif +#endif // callback: pass by l-value reference, and resubmit // ================================================= @@ -522,13 +538,19 @@ void test_send_recv_cb_resubmit( while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit) { launch_test(test_send_recv_cb_resubmit); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_device) +{ launch_test(test_send_recv_cb_resubmit); -#endif } +# endif +#endif // callback: pass by r-value reference (give up ownership), and resubmit // ===================================================================== @@ -580,10 +602,16 @@ void test_send_recv_cb_resubmit_disown( while (sent < NITERS || received < NITERS) { env.comm.progress(); }; } +#ifndef TEST_DEVICE_MODE_ONLY TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown) { launch_test(test_send_recv_cb_resubmit_disown); -#if HWMALLOC_ENABLE_DEVICE +} +#else +# if HWMALLOC_ENABLE_DEVICE +TEST_F(mpi_test_fixture, send_recv_cb_resubmit_disown_device) +{ launch_test(test_send_recv_cb_resubmit_disown); -#endif } +# endif +#endif From 06b9e0a77fcddd2a35ebd2b537b7734c68f973a1 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 20:09:15 +0200 Subject: [PATCH 56/68] Remove ipaddress locality functions and instead use AV fi_to_str Setting the address format is not required when the provider chooses it, make use of formatting feature provided by libfabric to simplify display of addresses and setting hints during open --- src/libfabric/CMakeLists.txt | 3 +- src/libfabric/controller.hpp | 21 ++--- src/libfabric/controller_base.hpp | 40 +++------ src/libfabric/locality.cpp | 31 ------- src/libfabric/locality.hpp | 132 +++++++----------------------- 5 files changed, 55 insertions(+), 172 deletions(-) delete mode 100644 src/libfabric/locality.cpp diff --git a/src/libfabric/CMakeLists.txt b/src/libfabric/CMakeLists.txt index fa99a413..92128897 100644 --- a/src/libfabric/CMakeLists.txt +++ b/src/libfabric/CMakeLists.txt @@ -19,9 +19,8 @@ list(TRANSFORM oomph_sources PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/../ target_sources(oomph_libfabric PRIVATE ${oomph_sources_libfabric}) target_sources(oomph_libfabric PRIVATE context.cpp) target_sources(oomph_libfabric PRIVATE operation_context.cpp) -target_sources(oomph_libfabric PRIVATE locality.cpp) -# if we are using GPU, then the libfabric library was probably built with +# if we are using GPU, then the libfabric library was probably built with # gpu support, and we should link to cuda to prevent link errors if (HWMALLOC_ENABLE_DEVICE) include(CheckLanguage) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 53c67bad..8f1d3d30 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -95,14 +95,17 @@ namespace oomph::libfabric { void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); - std::vector localities(size * locality_defs::array_size, 0); + + // array of empty locality objects + std::vector localities(size); // if (rank > 0) { LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("sending here"), iplocality(here_), "size", + debug(debug::str<>("sending here"), here_.to_str(), "size", locality_defs::array_size)); - /*int err = */ MPI_Send(here_.fabric_data(), locality_defs::array_size, MPI_CHAR, + /*int err = */ MPI_Send(here_.fabric_data().data(), locality_defs::array_size, + MPI_CHAR, 0, // dst rank 0, // tag comm); @@ -120,14 +123,14 @@ namespace oomph::libfabric { else { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); - memcpy(&localities[0], here_.fabric_data(), locality_defs::array_size); + memcpy(&localities[0], here_.fabric_data().data(), locality_defs::array_size); for (int i = 1; i < size; ++i) { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving address"), debug::dec<>(i))); MPI_Status status; - /*int err = */ MPI_Recv(&localities[i * locality_defs::array_size], - size * locality_defs::array_size, MPI_CHAR, + /*int err = */ MPI_Recv(&localities[i], size * locality_defs::array_size, + MPI_CHAR, i, // src rank 0, // tag comm, &status); @@ -152,10 +155,8 @@ namespace oomph::libfabric { LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); for (int i = 0; i < size; ++i) { - locality temp; - int offset = i * locality_defs::array_size; - memcpy(temp.fabric_data_writable(), &localities[offset], locality_defs::array_size); - insert_address(av, temp); + locality temp(localities[i], av); + insert_address(temp); } } diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 51057234..a91f8bc8 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -666,7 +666,7 @@ namespace NS_LIBFABRIC { // once enabled we can get the address enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), iplocality(here_))); + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), here_.to_str())); // // if we are using scalable endpoints, then setup tx/rx contexts // // we will us a single endpoint for all Tx/Rx contexts @@ -787,25 +787,9 @@ namespace NS_LIBFABRIC { throw NS_LIBFABRIC::fabric_error(-1, "Failed to allocate fabric hints"); } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Here locality"), iplocality(here_))); - -#if defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_TCP) || defined(HAVE_LIBFABRIC_VERBS) - fabric_hints_->addr_format = FI_SOCKADDR_IN; -#elif defined(HAVE_LIBFABRIC_EFA) - fabric_hints_->addr_format = FI_ADDR_EFA; -#elif defined(HAVE_LIBFABRIC_SHM) - fabric_hints_->addr_format = FI_ADDR_STR; -#endif - - fabric_hints_->caps = caps_flags(); - - fabric_hints_->mode = FI_CONTEXT /*| FI_MR_LOCAL*/; - if (provider.c_str() == std::string("tcp")) - { - fabric_hints_->fabric_attr->prov_name = - strdup(std::string(provider + ";ofi_rxm").c_str()); - } - else if (provider.c_str() == std::string("verbs")) + // setup the provider we want to use before getting info + if ((provider.c_str() == std::string("tcp")) || + (provider.c_str() == std::string("verbs"))) { fabric_hints_->fabric_attr->prov_name = strdup(std::string(provider + ";ofi_rxm").c_str()); @@ -1254,7 +1238,7 @@ namespace NS_LIBFABRIC { { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", - locality::to_str(local_addr, av_))); + locality(local_addr, av_).to_str())); std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) @@ -1264,7 +1248,7 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("raw address data"), temp2.str().c_str())); } - return locality(local_addr); + return locality(local_addr, av_); } // -------------------------------------------------------------------- @@ -1329,7 +1313,7 @@ namespace NS_LIBFABRIC { if ((ret == 0) && (addrlen <= locality_defs::array_size)) { LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), iplocality(addr))); + debug(debug::str<>("address vector"), debug::dec<3>(i), addr.to_str())); } else { @@ -1494,9 +1478,9 @@ namespace NS_LIBFABRIC { [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), iplocality(address), NS_DEBUG::ptr(av))); + trace(debug::str<>("inserting AV"), address.to_str(), NS_DEBUG::ptr(av))); fi_addr_t fi_addr = 0xffff'ffff; - int ret = fi_av_insert(av, address.fabric_data(), 1, &fi_addr, 0, nullptr); + int ret = fi_av_insert(av, address.fabric_data().data(), 1, &fi_addr, 0, nullptr); if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } else if (ret == 0) { @@ -1504,10 +1488,10 @@ namespace NS_LIBFABRIC { NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); } // address was generated correctly, now update the locality with the fi_addr - locality new_locality(address, fi_addr); + locality new_locality(address, fi_addr, av); LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), - iplocality(new_locality), "fi_addr", debug::hex<4>(fi_addr))); + trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), new_locality.to_str(), + "fi_addr", debug::hex<4>(fi_addr))); return new_locality; } }; diff --git a/src/libfabric/locality.cpp b/src/libfabric/locality.cpp deleted file mode 100644 index ff23eeb5..00000000 --- a/src/libfabric/locality.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * ghex-org - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include - -namespace oomph { namespace libfabric { - - // ------------------------------------------------------------------ - // format as ip address, port, libfabric address - // ------------------------------------------------------------------ - iplocality::iplocality(locality const& l) - : data(l) - { - } - - std::ostream& operator<<(std::ostream& os, iplocality const& p) - { - os << std::dec << NS_DEBUG::ipaddr(p.data.fabric_data()) << " - " - << NS_DEBUG::ipaddr(p.data.ip_address()) << ":" << NS_DEBUG::dec<>(p.data.port()) << " (" - << NS_DEBUG::dec<>(p.data.fi_address()) << ") "; - return os; - } - -}} // namespace oomph::libfabric diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 1fd35425..24cdef24 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -23,8 +23,7 @@ // #include "oomph_libfabric_defines.hpp" -// Different providers use different address formats that we must accommodate -// in our locality object. +// Different providers use different address formats that we must accommodate in our locality object. #ifdef HAVE_LIBFABRIC_GNI # define HAVE_LIBFABRIC_LOCALITY_SIZE 48 #endif @@ -44,32 +43,25 @@ #if defined(HAVE_LIBFABRIC_VERBS) || defined(HAVE_LIBFABRIC_TCP) || \ defined(HAVE_LIBFABRIC_SOCKETS) || defined(HAVE_LIBFABRIC_PSM2) # define HAVE_LIBFABRIC_LOCALITY_SIZE 16 -# define HAVE_LIBFABRIC_LOCALITY_SOCKADDR #endif #if defined(HAVE_LIBFABRIC_SHM) # define HAVE_LIBFABRIC_LOCALITY_SIZE 24 #endif +#if defined(HAVE_LIBFABRIC_LNX) +# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +#endif + namespace oomph { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print loc_deb("LOCALTY"); + static NS_DEBUG::enable_print loc_deb("LOCALTY"); } // namespace oomph namespace oomph { namespace libfabric { struct locality; - // ------------------------------------------------------------------ - // format as ip address, port, libfabric address - // ------------------------------------------------------------------ - struct iplocality - { - locality const& data; - iplocality(locality const& a); - friend std::ostream& operator<<(std::ostream& os, iplocality const& p); - }; - // -------------------------------------------------------------------- // Locality, in this structure we store the information required by // libfabric to make a connection to another node. @@ -91,45 +83,50 @@ namespace oomph { namespace libfabric { static char const* type() { return "libfabric"; } - explicit locality(locality_data const& in_data) + explicit locality(locality_data const& in_data, struct fid_av* av) { std::memcpy(&data_[0], &in_data[0], locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("expl constructing"), iplocality((*this)))); + av_ = av; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), to_str())); } locality() { std::memset(&data_[0], 0x00, locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), iplocality((*this)))); + av_ = nullptr; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("default construct"), to_str())); } locality(locality const& other) : data_(other.data_) , fi_address_(other.fi_address_) + , av_(other.av_) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy construct"), to_str())); } - locality(locality const& other, fi_addr_t addr) + locality(locality const& other, fi_addr_t addr, struct fid_av* av) : data_(other.data_) , fi_address_(addr) + , av_(av) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy fi construct"), to_str())); } locality(locality&& other) : data_(std::move(other.data_)) , fi_address_(other.fi_address_) + , av_(other.av_) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), iplocality((*this)))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("move construct"), to_str())); } // provided to support sockets mode bootstrap explicit locality(std::string const& address, std::string const& portnum) { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct"), address, ":", portnum)); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("explicit construct-2"), address, ":", portnum)); // struct sockaddr_in socket_data; memset(&socket_data, 0, sizeof(socket_data)); @@ -139,81 +136,25 @@ namespace oomph { namespace libfabric { // std::memcpy(&data_[0], &socket_data, locality_defs::array_size); fi_address_ = 0; - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), iplocality((*this)))); - } - - // some condition marking this locality as valid - explicit inline operator bool() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("bool operator"), iplocality((*this)))); - return (ip_address() != 0); - } - - inline bool valid() const - { - LF_DEB(loc_deb, trace(NS_DEBUG::str<>("valid operator"), iplocality((*this)))); - return (ip_address() != 0); + av_ = nullptr; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("string constructing"), to_str())); } locality& operator=(locality const& other) { data_ = other.data_; fi_address_ = other.fi_address_; - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("copy operator"), iplocality(*this), iplocality(other))); + av_ = other.av_; + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("copy operator"), to_str(), other.to_str())); return *this; } bool operator==(locality const& other) { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality operator"), iplocality(*this), iplocality(other))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality operator"), to_str(), other.to_str())); return std::memcmp(&data_, &other.data_, locality_defs::array_size) == 0; } - bool less_than(locality const& other) - { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("less operator"), iplocality(*this), iplocality(other))); - if (ip_address() < other.ip_address()) return true; - if (ip_address() == other.ip_address()) return port() < other.port(); - return false; - } - - uint32_t const& ip_address() const - { -#if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(data_.data())->sin_addr.s_addr; -#elif defined(HAVE_LIBFABRIC_GNI) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_CXI) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_EFA) - return data_[0]; -#elif defined(HAVE_LIBFABRIC_SHM) - return data_[0]; -#else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); -#endif - } - - static uint32_t const& ip_address(locality_data const& data) - { -#if defined(HAVE_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(&data)->sin_addr.s_addr; -#elif defined(HAVE_LIBFABRIC_GNI) - return data[0]; -#elif defined(HAVE_LIBFABRIC_CXI) - return data[0]; -#elif defined(HAVE_LIBFABRIC_EFA) - return data[0]; -#elif defined(HAVE_LIBFABRIC_SHM) - return data[0]; -#else - throw fabric_error(0, "unsupported fabric provider, please fix ASAP"); -#endif - } - inline fi_addr_t const& fi_address() const { return fi_address_; } inline void set_fi_address(fi_addr_t fi_addr) { fi_address_ = fi_addr; } @@ -225,39 +166,27 @@ namespace oomph { namespace libfabric { return port; } - inline void const* fabric_data() const { return data_.data(); } + inline locality_data const& fabric_data() const { return data_; } inline char* fabric_data_writable() { return reinterpret_cast(data_.data()); } - static std::string to_str(locality_data const& data, struct fid_av* av) + std::string to_str() const { char sbuf[256]; size_t buflen = 256; - char const* straddr_ret = fi_av_straddr(av, data.data(), sbuf, &buflen); - std::string result = straddr_ret ? straddr_ret : ""; - // free((char*)(straddr_ret)); + if (!av_) { return "No address vector"; } + char const* straddr_ret = fi_av_straddr(av_, data_.data(), sbuf, &buflen); + std::string result = straddr_ret ? straddr_ret : "Address formatting Error"; return result; } private: friend bool operator==(locality const& lhs, locality const& rhs) { - LF_DEB(loc_deb, - trace(NS_DEBUG::str<>("equality friend"), iplocality(lhs), iplocality(rhs))); + LF_DEB(loc_deb, trace(NS_DEBUG::str<>("equality friend"), lhs.to_str(), rhs.to_str())); return ((lhs.data_ == rhs.data_) && (lhs.fi_address_ == rhs.fi_address_)); } - friend bool operator<(locality const& lhs, locality const& rhs) - { - uint32_t const& a1 = lhs.ip_address(); - uint32_t const& a2 = rhs.ip_address(); - fi_addr_t const& f1 = lhs.fi_address(); - fi_addr_t const& f2 = rhs.fi_address(); - LF_DEB( - loc_deb, trace(NS_DEBUG::str<>("less friend"), iplocality(lhs), iplocality(rhs))); - return (a1 < a2) || (a1 == a2 && f1 < f2); - } - friend std::ostream& operator<<(std::ostream& os, locality const& loc) { for (uint32_t i = 0; i < locality_defs::array_length; ++i) { os << loc.data_[i]; } @@ -267,6 +196,7 @@ namespace oomph { namespace libfabric { private: locality_data data_; fi_addr_t fi_address_; + struct fid_av* av_; }; }} // namespace oomph::libfabric From 9dbcffe9682f4a2f581a470d048eaba82d9ab9ec Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 7 Jul 2025 22:13:51 +0200 Subject: [PATCH 57/68] Add LNX provider, simplify provider #ifdefs and fabric hints/info setup We do not need #ifdefs for all providers now that address formats are simpler, and by using info/hints supplied by get_info we can allow libfabric to set default values for more fields Simplify cmake generation of provider #ifdefs by simply capitalizing the provider name in the ifdef rathar doing each one by hand --- cmake/oomph_libfabric.cmake | 347 ++++++++++++++++-------------- src/libfabric/controller.hpp | 13 +- src/libfabric/controller_base.hpp | 55 ++++- 3 files changed, 239 insertions(+), 176 deletions(-) diff --git a/cmake/oomph_libfabric.cmake b/cmake/oomph_libfabric.cmake index 18c90369..1ddaf71d 100644 --- a/cmake/oomph_libfabric.cmake +++ b/cmake/oomph_libfabric.cmake @@ -1,177 +1,196 @@ # set all libfabric related options and values -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Enable libfabric support -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ set(OOMPH_WITH_LIBFABRIC OFF CACHE BOOL "Build with LIBFABRIC backend") -if (OOMPH_WITH_LIBFABRIC) - find_package(Libfabric REQUIRED) - add_library(oomph_libfabric SHARED) - add_library(oomph::libfabric ALIAS oomph_libfabric) - oomph_shared_lib_options(oomph_libfabric) - target_link_libraries(oomph_libfabric PUBLIC libfabric::libfabric) - install(TARGETS oomph_libfabric - EXPORT oomph-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) - - # --------------------------------------------------------------------- - # Function to add config defines to a list that depends on a namespace variable - # #defines that match the namespace can later be written out to a file - # --------------------------------------------------------------------- - function(oomph_libfabric_add_config_define_namespace) - set(options) - set(one_value_args DEFINE NAMESPACE) - set(multi_value_args VALUE) - cmake_parse_arguments(OPTION - "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - - set(DEF_VAR OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) - - # to avoid extra trailing spaces (no value), use an if check - if(OPTION_VALUE) - set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE} ${OPTION_VALUE}") - else() - set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE}") - endif() - - endfunction() - - # --------------------------------------------------------------------- - # Function to write out all the config defines for a given namespace - # into a config file - # --------------------------------------------------------------------- - function(oomph_libfabric_write_config_defines_file) - set(options) - set(one_value_args TEMPLATE NAMESPACE FILENAME) - set(multi_value_args) - cmake_parse_arguments(OPTION - "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - - get_property(DEFINITIONS_VAR GLOBAL PROPERTY - OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) - - if(DEFINED DEFINITIONS_VAR) - list(SORT DEFINITIONS_VAR) - list(REMOVE_DUPLICATES DEFINITIONS_VAR) - endif() - - set(oomph_config_defines "\n") - foreach(def ${DEFINITIONS_VAR}) - set(oomph_config_defines "${oomph_config_defines}#define ${def}\n") - endforeach() - - # if the user has not specified a template, generate a proper header file - if (NOT OPTION_TEMPLATE) - string(TOUPPER ${OPTION_NAMESPACE} NAMESPACE_UPPER) - set(PREAMBLE +if(OOMPH_WITH_LIBFABRIC) + find_package(Libfabric REQUIRED) + add_library(oomph_libfabric SHARED) + add_library(oomph::libfabric ALIAS oomph_libfabric) + oomph_shared_lib_options(oomph_libfabric) + target_link_libraries(oomph_libfabric PUBLIC libfabric::libfabric) + install(TARGETS oomph_libfabric EXPORT oomph-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + + # --------------------------------------------------------------------- + # Function to add config defines to a list that depends on a namespace + # variable #defines that match the namespace can later be written out to a + # file + # --------------------------------------------------------------------- + function(oomph_libfabric_add_config_define_namespace) + set(options) + set(one_value_args DEFINE NAMESPACE) + set(multi_value_args VALUE) + cmake_parse_arguments( + OPTION "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN} + ) + + set(DEF_VAR OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE}) + + # to avoid extra trailing spaces (no value), use an if check + if(OPTION_VALUE) + set_property( + GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE} ${OPTION_VALUE}" + ) + else() + set_property(GLOBAL APPEND PROPERTY ${DEF_VAR} "${OPTION_DEFINE}") + endif() + + endfunction() + + # --------------------------------------------------------------------- + # Function to write out all the config defines for a given namespace into a + # config file + # --------------------------------------------------------------------- + function(oomph_libfabric_write_config_defines_file) + set(options) + set(one_value_args TEMPLATE NAMESPACE FILENAME) + set(multi_value_args) + cmake_parse_arguments( + OPTION "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN} + ) + + get_property( + DEFINITIONS_VAR GLOBAL + PROPERTY OOMPH_LIBFABRIC_CONFIG_DEFINITIONS_${OPTION_NAMESPACE} + ) + + if(DEFINED DEFINITIONS_VAR) + list(SORT DEFINITIONS_VAR) + list(REMOVE_DUPLICATES DEFINITIONS_VAR) + endif() + + set(oomph_config_defines "\n") + foreach(def ${DEFINITIONS_VAR}) + set(oomph_config_defines "${oomph_config_defines}#define ${def}\n") + endforeach() + + # if the user has not specified a template, generate a proper header file + if(NOT OPTION_TEMPLATE) + string(TOUPPER ${OPTION_NAMESPACE} NAMESPACE_UPPER) + set(PREAMBLE "\n" "// Do not edit this file! It has been generated by the cmake configuration step.\n" "\n" "#ifndef OOMPH_LIBFABRIC_CONFIG_${NAMESPACE_UPPER}_HPP\n" "#define OOMPH_LIBFABRIC_CONFIG_${NAMESPACE_UPPER}_HPP\n" - ) - set(TEMP_FILENAME "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${NAMESPACE_UPPER}") - file(WRITE ${TEMP_FILENAME} - ${PREAMBLE} - ${oomph_config_defines} - "#endif\n" - ) - configure_file("${TEMP_FILENAME}" "${OPTION_FILENAME}" COPYONLY) - file(REMOVE "${TEMP_FILENAME}") - else() - configure_file("${OPTION_TEMPLATE}" - "${OPTION_FILENAME}" - @ONLY) - endif() - endfunction() - - include(CMakeParseArguments) - - #------------------------------------------------------------------------------ - # Hardware device selection - #------------------------------------------------------------------------------ - set(OOMPH_LIBFABRIC_PROVIDER "tcp" CACHE - STRING "The provider (cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband))") - set_property(CACHE OOMPH_LIBFABRIC_PROVIDER PROPERTY STRINGS - "cxi" "efa" "gni" "psm2" "tcp" "verbs" "shm") - - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_PROVIDER - VALUE "\"${OOMPH_LIBFABRIC_PROVIDER}\"" - NAMESPACE libfabric) - - option(OOMPH_LIBFABRIC_V1_API "Support older libfabric@1.15" OFF) - if (OOMPH_LIBFABRIC_V1_API) - oomph_libfabric_add_config_define_namespace( - DEFINE OOMPH_LIBFABRIC_V1_API - NAMESPACE libfabric) - endif() - - if(OOMPH_LIBFABRIC_PROVIDER MATCHES "verbs") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_VERBS - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "gni") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_GNI - NAMESPACE libfabric) - # add pmi library - set(_libfabric_libraries ${_libfabric_libraries} PMIx::libpmix) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "cxi") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_CXI - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "efa") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_EFA - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "tcp") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_TCP - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "sockets") - message(WARNING "The Sockets provider is deprecated in favor of the tcp, udp, " - "and utility providers") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_SOCKETS - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "psm2") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_PSM2 - NAMESPACE libfabric) - elseif(OOMPH_LIBFABRIC_PROVIDER MATCHES "shm") - oomph_libfabric_add_config_define_namespace( - DEFINE HAVE_LIBFABRIC_SHM - NAMESPACE libfabric) - endif() - - #------------------------------------------------------------------------------ - # Performance counters - #------------------------------------------------------------------------------ - set(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS OFF BOOL - STRING "Enable libfabric parcelport performance counters (default: OFF)") - mark_as_advanced(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) - - if (OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) - oomph_libfabric_add_config_define_namespace( - DEFINE OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS - NAMESPACE libfabric) + ) + set(TEMP_FILENAME + "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${NAMESPACE_UPPER}" + ) + file(WRITE ${TEMP_FILENAME} ${PREAMBLE} ${oomph_config_defines} + "#endif\n" + ) + configure_file("${TEMP_FILENAME}" "${OPTION_FILENAME}" COPYONLY) + file(REMOVE "${TEMP_FILENAME}") + else() + configure_file("${OPTION_TEMPLATE}" "${OPTION_FILENAME}" @ONLY) endif() - - #------------------------------------------------------------------------------ - # used by template expansion for location of print.hpp - #------------------------------------------------------------------------------ - set(OOMPH_SRC_LIBFABRIC_DIR "${PROJECT_SOURCE_DIR}/src/libfabric") - - #------------------------------------------------------------------------------ - # Write options to file in build dir - #------------------------------------------------------------------------------ - oomph_libfabric_write_config_defines_file( - NAMESPACE libfabric - FILENAME "${PROJECT_BINARY_DIR}/src/libfabric/oomph_libfabric_defines.hpp" - TEMPLATE "${OOMPH_SRC_LIBFABRIC_DIR}/libfabric_defines_template.hpp" + endfunction() + + include(CMakeParseArguments) + + # ------------------------------------------------------------------------------ + # Hardware device selection + # ------------------------------------------------------------------------------ + set(OOMPH_LIBFABRIC_PROVIDER + "tcp" + CACHE + STRING + "The provider cxi(Cray Slingshot)/efa(Amazon Elastic)/gni(Cray Gemini)/psm2(Intel Omni-Path)/tcp/verbs(Infiniband), shm, lnx" + ) + set_property( + CACHE OOMPH_LIBFABRIC_PROVIDER + PROPERTY STRINGS + "cxi" + "efa" + "gni" + "psm2" + "tcp" + "verbs" + "shm" + "lnx" + ) + + oomph_libfabric_add_config_define_namespace( + DEFINE HAVE_LIBFABRIC_PROVIDER VALUE "\"${OOMPH_LIBFABRIC_PROVIDER}\"" + NAMESPACE libfabric + ) + + option(OOMPH_LIBFABRIC_V1_API "Support older libfabric@1.15" OFF) + if(OOMPH_LIBFABRIC_V1_API) + oomph_libfabric_add_config_define_namespace( + DEFINE OOMPH_LIBFABRIC_V1_API NAMESPACE libfabric + ) + endif() + + # Map provider string to uppercase and create a define + string(TOUPPER "${OOMPH_LIBFABRIC_PROVIDER}" PROVIDER_UPPER) + oomph_libfabric_add_config_define_namespace( + DEFINE "HAVE_LIBFABRIC_${PROVIDER_UPPER}" NAMESPACE libfabric + ) + + # Special handling for deprecated or extra cases + if(OOMPH_LIBFABRIC_PROVIDER STREQUAL "sockets") + message( + WARNING + "The + Sockets + provider + is + deprecated + in + favor + of + the + tcp, + udp, + and + utility + providers" + ) + endif() + + # Special handling for gni provider needing PMIx + if(OOMPH_LIBFABRIC_PROVIDER STREQUAL "gni") + set(_libfabric_libraries ${_libfabric_libraries} PMIx::libpmix) + endif() + + # ------------------------------------------------------------------------------ + # Performance counters + # ------------------------------------------------------------------------------ + set(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS + OFF + BOOL + STRING + "Enable libfabric performance counters (default: OFF)" + ) + mark_as_advanced(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) + + if(OOMPH_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) + oomph_libfabric_add_config_define_namespace( + DEFINE OOMPH_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS NAMESPACE libfabric ) - target_include_directories(oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric") + endif() + + # ------------------------------------------------------------------------------ + # used by template expansion for location of print.hpp + # ------------------------------------------------------------------------------ + set(OOMPH_SRC_LIBFABRIC_DIR "${PROJECT_SOURCE_DIR}/src/libfabric") + + # ------------------------------------------------------------------------------ + # Write options to file in build dir + # ------------------------------------------------------------------------------ + oomph_libfabric_write_config_defines_file( + NAMESPACE libfabric FILENAME + "${PROJECT_BINARY_DIR}/src/libfabric/oomph_libfabric_defines.hpp" TEMPLATE + "${OOMPH_SRC_LIBFABRIC_DIR}/libfabric_defines_template.hpp" + ) + target_include_directories( + oomph_libfabric PRIVATE "${PROJECT_BINARY_DIR}/src/libfabric" + ) endif() diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 8f1d3d30..fbae4a3c 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -75,15 +75,14 @@ namespace oomph::libfabric { } // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() + uint64_t caps_flags(uint64_t /*available_flags*/) const { -#if OOMPH_ENABLE_DEVICE && !defined(HAVE_LIBFABRIC_TCP) - std::int64_t hmem_flags = FI_HMEM; -#else - std::int64_t hmem_flags = 0; + uint64_t flags_required = FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | + FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE; +#if OOMPH_ENABLE_DEVICE + flags_required |= FI_HMEM; #endif - return hmem_flags | FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | - FI_SEND | FI_TRANSMIT | FI_REMOTE_READ | FI_REMOTE_WRITE; + return flags_required; } // -------------------------------------------------------------------- diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index a91f8bc8..10a373b9 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -205,7 +205,7 @@ static std::vector> gni_ints = { # define LIBFABRIC_FI_VERSION_MINOR 15 #else # define LIBFABRIC_FI_VERSION_MAJOR 2 -# define LIBFABRIC_FI_VERSION_MINOR 0 +# define LIBFABRIC_FI_VERSION_MINOR 2 #endif namespace NS_DEBUG { @@ -746,7 +746,24 @@ namespace NS_LIBFABRIC { } // -------------------------------------------------------------------- - constexpr uint64_t caps_flags() { return static_cast(this)->caps_flags(); } + uint64_t caps_flags(uint64_t available_flags) const + { + uint64_t required_flags = + static_cast(this)->caps_flags(available_flags); + // + uint64_t final_flags = required_flags; + for (uint64_t bit = 0; bit < 64; ++bit) + { + uint64_t f = (1ULL << bit); + if ((required_flags & f) && ((available_flags & f) == 0)) + { + NS_DEBUG::cnb_err.error( + debug::str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS)); + final_flags &= ~f; + } + } + return final_flags; + } // -------------------------------------------------------------------- constexpr fi_threading threadlevel_flags() @@ -764,7 +781,7 @@ namespace NS_LIBFABRIC { base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; + return base_flags | FI_MR_MMU_NOTIFY /*| FI_MR_ENDPOINT*/; #elif defined(HAVE_LIBFABRIC_EFA) return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; @@ -775,6 +792,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- uint32_t rendezvous_threshold() { return msg_rendezvous_threshold_; } + // -------------------------------------------------------------------- // initialize the basic fabric/domain/name void open_fabric(std::string const& provider, int threads, bool rootnode) @@ -798,7 +816,35 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + // get an info object to see what might be available before we set any flags + uint64_t flags = 0; + int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + nullptr, nullptr, flags, fabric_hints_, &fabric_info_); + if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); + if (display_fabric_info_ && fabric_info_) + { + char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); + if (info_str) + { + LF_DEB(NS_DEBUG::cnb_err, + trace(debug::str<>("Fabric info"), "pre-check ->", + fabric_hints_->fabric_attr->prov_name, "\n", + fi_tostr(fabric_info_, FI_TYPE_INFO))); + } + } + + fabric_hints_->caps = caps_flags(fabric_info_->caps); + if ((fabric_info_->mode & FI_CONTEXT) == 0) + { + LF_DEB(NS_DEBUG::cnb_err, + debug(debug::str<>("mode FI_CONTEXT!=0"), + fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); + } + fabric_hints_->mode = fabric_info_->mode; + fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) + << std::endl; // Enable/Disable the use of progress threads auto progress = libfabric_progress_type(); @@ -827,13 +873,12 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); fabric_hints_->ep_attr->type = FI_EP_RDM; - uint64_t flags = 0; LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("get fabric info"), "FI_VERSION", debug::dec(LIBFABRIC_FI_VERSION_MAJOR), debug::dec(LIBFABRIC_FI_VERSION_MINOR))); - int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), + ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), nullptr, nullptr, flags, fabric_hints_, &fabric_info_); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); From e1b043d73cebaebdab0e8874adac66bbee4f7a09 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 08:52:52 +0200 Subject: [PATCH 58/68] Use thread mask (instead of boost::physical_concurrency) for num threads when using GNU --- src/libfabric/context.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 68112e9e..2ce3bee1 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -44,7 +44,18 @@ namespace oomph { // in the following order: single threaded first, then multi-threaded after // int threads = thread_safe ? std::thread::hardware_concurrency() : 1; // int threads = std::thread::hardware_concurrency(); - int threads = boost::thread::physical_concurrency(); + // Determine the number of threads based on the CPU affinity mask + int threads = 1; +#if defined(_GNU_SOURCE) + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + if (sched_getaffinity(0, sizeof(cpuset), &cpuset) == 0) + threads = CPU_COUNT(&cpuset); + else + threads = boost::thread::physical_concurrency(); +#else + threads = boost::thread::physical_concurrency(); +#endif m_controller = init_libfabric_controller(this, comm, rank, size, threads, debug); m_domain = m_controller->get_domain(); } From 52b0cf6214dbdbd9d177690399a422d458904caf Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 16:00:14 +0200 Subject: [PATCH 59/68] Fix cxi initialization, some hints must be set before fi_info becomes valid --- src/libfabric/controller_base.hpp | 28 +++++++++++++++----- src/libfabric/libfabric_defines_template.hpp | 5 +++- src/libfabric/print.hpp | 9 +++++-- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 10a373b9..d97e5fc3 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -517,6 +517,7 @@ namespace NS_LIBFABRIC { endpoint_wrapper create_rx_endpoint( struct fid_domain* domain, struct fi_info* info, struct fid_av* av) { + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); auto ep_rx = new_endpoint_active(domain, info, false); // bind address vector @@ -774,14 +775,14 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- constexpr std::int64_t memory_registration_mode_flags() { - std::int64_t base_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + std::int64_t base_flags = FI_MR_ALLOCATED; // | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE base_flags = base_flags | FI_MR_HMEM; #endif base_flags = base_flags | FI_MR_LOCAL; #if defined(HAVE_LIBFABRIC_CXI) - return base_flags | FI_MR_MMU_NOTIFY /*| FI_MR_ENDPOINT*/; + return base_flags | FI_MR_ENDPOINT; #elif defined(HAVE_LIBFABRIC_EFA) return base_flags | FI_MR_MMU_NOTIFY | FI_MR_ENDPOINT; @@ -816,6 +817,20 @@ namespace NS_LIBFABRIC { LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); +#if defined(HAVE_LIBFABRIC_CXI) + // libfabric domain for multi-nic CXI provider + char const* cxi_domain = std::getenv("FI_CXI_DEVICE_NAME"); + if (cxi_domain == nullptr) + { + LF_DEB(NS_DEBUG::cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); + } + else { fabric_hints_->domain_attr->name = strdup(cxi_domain); } + LF_DEB(NS_DEBUG::cnb_deb, + debug(debug::str<>("fabric domain"), fabric_hints_->domain_attr->name)); +#endif + + fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); + // get an info object to see what might be available before we set any flags uint64_t flags = 0; int ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), @@ -842,7 +857,6 @@ namespace NS_LIBFABRIC { } fabric_hints_->mode = fabric_info_->mode; fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); - fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) << std::endl; @@ -888,6 +902,9 @@ namespace NS_LIBFABRIC { trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } + int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; + LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_PROV_KEY"), mrkey)); + bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); @@ -1057,9 +1074,8 @@ namespace NS_LIBFABRIC { int ret = fi_endpoint(domain, hints, &ep, nullptr); if (ret) { - throw NS_LIBFABRIC::fabric_error(ret, - "fi_endpoint (too many threadlocal " - "endpoints?)"); + throw NS_LIBFABRIC::fabric_error( + ret, "fi_endpoint (too many threadlocal endpoints?)"); } fi_freeinfo(hints); LF_DEB( diff --git a/src/libfabric/libfabric_defines_template.hpp b/src/libfabric/libfabric_defines_template.hpp index efd2bb67..ea2a105b 100644 --- a/src/libfabric/libfabric_defines_template.hpp +++ b/src/libfabric/libfabric_defines_template.hpp @@ -19,7 +19,10 @@ #ifndef LF_DEB # define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; + { \ + using namespace NS_DEBUG; \ + if constexpr (printer.is_enabled()) { printer.Expr; }; \ + } #endif #define LFSOURCE_DIR "@OOMPH_SRC_LIBFABRIC_DIR@" diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index 73c37c41..301f8e12 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -73,8 +73,13 @@ extern char** environ; // ------------------------------------------------------------ #define NS_DEBUG oomph::debug -#define LF_DEB(printer, Expr) \ - if constexpr (printer.is_enabled()) { printer.Expr; }; +#ifndef LF_DEB +# define LF_DEB(printer, Expr) \ + { \ + using namespace NS_DEBUG; \ + if constexpr (printer.is_enabled()) { printer.Expr; }; \ + } +#endif // ------------------------------------------------------------ /// \cond NODETAIL From 5a9c3ad6c9ffb5e6849395d98deab6a0cf2825a9 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 08:52:22 +0200 Subject: [PATCH 60/68] Disable debug messages --- src/libfabric/communicator.hpp | 2 +- src/libfabric/controller.hpp | 2 +- src/libfabric/fabric_error.hpp | 2 +- src/libfabric/locality.hpp | 2 +- src/libfabric/operation_context.hpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index a38419dc..850f3e98 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -32,7 +32,7 @@ namespace oomph { using tag_disp = NS_DEBUG::detail::hex<12, uintptr_t>; template - inline /*constexpr*/ NS_DEBUG::print_threshold com_deb("COMMUNI"); + inline NS_DEBUG::print_threshold com_deb("COMMUNI"); static NS_DEBUG::enable_print com_err("COMMUNI"); diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index fbae4a3c..b8df1b70 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -40,7 +40,7 @@ namespace NS_DEBUG { using namespace oomph::debug; template - inline /*constexpr*/ NS_DEBUG::print_threshold cnt_deb("CONTROL"); + inline NS_DEBUG::print_threshold cnt_deb("CONTROL"); // static NS_DEBUG::enable_print cnt_err("CONTROL"); } // namespace NS_DEBUG diff --git a/src/libfabric/fabric_error.hpp b/src/libfabric/fabric_error.hpp index 2ec59997..84e43dd5 100644 --- a/src/libfabric/fabric_error.hpp +++ b/src/libfabric/fabric_error.hpp @@ -18,7 +18,7 @@ namespace NS_DEBUG { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print err_deb("ERROR__"); + static NS_DEBUG::enable_print err_deb("ERROR__"); } // namespace NS_DEBUG namespace NS_LIBFABRIC { diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 24cdef24..67c753e7 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -55,7 +55,7 @@ namespace oomph { // cppcheck-suppress ConfigurationNotChecked - static NS_DEBUG::enable_print loc_deb("LOCALTY"); + static NS_DEBUG::enable_print loc_deb("LOCALTY"); } // namespace oomph namespace oomph { namespace libfabric { diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index 0f6b5103..74d6ba09 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -18,7 +18,7 @@ namespace oomph::libfabric { template - inline /*constexpr*/ NS_DEBUG::print_threshold opctx_deb("OP__CXT"); + inline NS_DEBUG::print_threshold opctx_deb("OP__CXT"); // This struct holds the ready state of a future // we must also store the context used in libfabric, in case From 21fdbbda47cb52ab45377f4f95eab742606af3b0 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 9 Jul 2025 20:28:24 +0200 Subject: [PATCH 61/68] Clean up debug: namespace usage and rename ptr to (hex) hptr --- src/libfabric/communicator.hpp | 113 ++++----- src/libfabric/context.cpp | 7 +- src/libfabric/context.hpp | 3 +- src/libfabric/controller.hpp | 132 +++++----- src/libfabric/controller_base.hpp | 303 +++++++++++------------ src/libfabric/memory_region.hpp | 32 ++- src/libfabric/operation_context.cpp | 6 +- src/libfabric/operation_context.hpp | 2 +- src/libfabric/operation_context_base.hpp | 2 +- src/libfabric/print.hpp | 37 +-- src/libfabric/request_state.hpp | 4 +- 11 files changed, 291 insertions(+), 350 deletions(-) diff --git a/src/libfabric/communicator.hpp b/src/libfabric/communicator.hpp index 850f3e98..6bec497b 100644 --- a/src/libfabric/communicator.hpp +++ b/src/libfabric/communicator.hpp @@ -63,7 +63,7 @@ namespace oomph { , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { - LF_DEB(com_deb<9>, debug(NS_DEBUG::str<>("MPI_comm"), NS_DEBUG::ptr(mpi_comm()))); + LF_DEB(com_deb<9>, debug(str<>("MPI_comm"), hptr(mpi_comm()))); m_tx_endpoint = m_context->get_controller()->get_tx_endpoint(); m_rx_endpoint = m_context->get_controller()->get_rx_endpoint(); } @@ -115,15 +115,15 @@ namespace oomph { void send_tagged_region(region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_, operation_context* ctxt) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("send_tagged_region"), - "->", NS_DEBUG::dec<2>(dst_addr_), + debug(str<>("send_tagged_region"), + "->", dec<2>(dst_addr_), send_region, "tag", tag_disp(tag_), - "context", NS_DEBUG::ptr(ctxt), - "tx endpoint", NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + "context", hptr(ctxt), + "tx endpoint", hptr(m_tx_endpoint.get_ep()))); // clang-format on execute_fi_function(fi_tsend, "fi_tsend", m_tx_endpoint.get_ep(), send_region.get_address(), size, send_region.get_local_key(), dst_addr_, tag_, @@ -135,12 +135,11 @@ namespace oomph { void inject_tagged_region( region_type const& send_region, std::size_t size, fi_addr_t dst_addr_, uint64_t tag_) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format on LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("inject tagged"), "->", NS_DEBUG::dec<2>(dst_addr_), - send_region, "tag", tag_disp(tag_), "tx endpoint", - NS_DEBUG::ptr(m_tx_endpoint.get_ep()))); + debug(str<>("inject tagged"), "->", dec<2>(dst_addr_), send_region, "tag", + tag_disp(tag_), "tx endpoint", hptr(m_tx_endpoint.get_ep()))); // clang-format off execute_fi_function(fi_tinject, "fi_tinject", m_tx_endpoint.get_ep(), send_region.get_address(), size, dst_addr_, tag_); @@ -153,15 +152,15 @@ namespace oomph { void recv_tagged_region(region_type const& recv_region, std::size_t size, fi_addr_t src_addr_, uint64_t tag_, operation_context* ctxt) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // clang-format off LF_DEB(com_deb<1>, - debug(NS_DEBUG::str<>("recv_tagged_region"), - "<-", NS_DEBUG::dec<2>(src_addr_), + debug(str<>("recv_tagged_region"), + "<-", dec<2>(src_addr_), recv_region, "tag", tag_disp(tag_), - "context", NS_DEBUG::ptr(ctxt), - "rx endpoint", NS_DEBUG::ptr(m_rx_endpoint.get_ep()))); + "context", hptr(ctxt), + "rx endpoint", hptr(m_rx_endpoint.get_ep()))); // clang-format on constexpr uint64_t ignore = 0; execute_fi_function(fi_trecv, "fi_trecv", m_rx_endpoint.get_ep(), @@ -175,7 +174,7 @@ namespace oomph { rank_type dst, oomph::tag_type tag, util::unique_function&& cb, std::size_t* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*this->rank(), */ this->m_context->get_context_tag()); @@ -189,8 +188,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("send mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("send mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->sends_posted_++; @@ -222,22 +221,22 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Send"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(dst), + debug(str<>("Send"), + "thisrank", dec<>(rank()), + "rank", dec<>(dst), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); #if OOMPH_ENABLE_DEVICE if (!ptr.on_device()) { LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("send region CRC32"), - NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); + debug(str<>("send region CRC32"), + mem_crc32(reg.get_address(), size, "CRC32"))); } #endif // clang-format on @@ -250,7 +249,7 @@ namespace oomph { oomph::tag_type tag, util::unique_function&& cb, std::size_t* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE @@ -263,8 +262,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->recvs_posted_++; @@ -275,22 +274,22 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("recv"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(src), + debug(str<>("recv"), + "thisrank", dec<>(rank()), + "rank", dec<>(src), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); #if OOMPH_ENABLE_DEVICE if (!ptr.on_device()) { LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("recv region CRC32"), - NS_DEBUG::mem_crc32(reg.get_address(), size, "CRC32"))); + debug(str<>("recv region CRC32"), + mem_crc32(reg.get_address(), size, "CRC32"))); } #endif // clang-format on @@ -304,7 +303,7 @@ namespace oomph { util::unique_function&& cb, std::atomic* scheduled) { - [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = com_deb<9>.scope(NS_DEBUG::hptr(this), __func__); std::uint64_t stag = make_tag64(tag, /*src, */ this->m_context->get_context_tag()); #if OOMPH_ENABLE_DEVICE @@ -317,8 +316,8 @@ namespace oomph { if (size != reg.get_size()) { LF_DEB(com_err, - error(NS_DEBUG::str<>("recv mismatch"), "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()))); + error(str<>("recv mismatch"), "size", hex<6>(size), "reg size", + hex<6>(reg.get_size()))); } #endif m_context->get_controller()->recvs_posted_++; @@ -330,17 +329,17 @@ namespace oomph { // clang-format off LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("shared_recv"), - "thisrank", NS_DEBUG::dec<>(rank()), - "rank", NS_DEBUG::dec<>(src), + debug(str<>("shared_recv"), + "thisrank", dec<>(rank()), + "rank", dec<>(src), "tag", tag_disp(std::uint64_t(tag)), //"wrapped tag", tag_disp(std::uint64_t(tag.get())), "stag", tag_disp(stag), - "addr", NS_DEBUG::ptr(reg.get_address()), - "size", NS_DEBUG::hex<6>(size), - "reg size", NS_DEBUG::hex<6>(reg.get_size()), - "op_ctx", NS_DEBUG::ptr(&(s->m_operation_context)), - "req", NS_DEBUG::ptr(s.get()))); + "addr", hptr(reg.get_address()), + "size", hex<6>(size), + "reg size", hex<6>(reg.get_size()), + "op_ctx", hptr(&(s->m_operation_context)), + "req", hptr(s.get()))); // clang-format on recv_tagged_region(reg, size, fi_addr_t(src), stag, &(s->m_operation_context)); @@ -360,14 +359,14 @@ namespace oomph { // (by other threads) m_send_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = - com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::ptr(req)); + com_deb<9>.scope("m_send_cb_queue.consume_all", NS_DEBUG::hptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); m_recv_cb_queue.consume_all([](oomph::detail::request_state* req) { [[maybe_unused]] auto scp = - com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::ptr(req)); + com_deb<9>.scope("m_recv_cb_queue.consume_all", NS_DEBUG::hptr(req)); auto ptr = req->release_self_ref(); req->invoke_cb(); }); @@ -391,8 +390,7 @@ namespace oomph { // submit the cancellation request bool ok = (fi_cancel(&m_rx_endpoint.get_ep()->fid, op_ctx) == 0); - LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "ok", ok, "op_ctx", NS_DEBUG::ptr(op_ctx))); + LF_DEB(com_deb<9>, debug(str<>("Cancel"), "ok", ok, "op_ctx", hptr(op_ctx))); // if the cancel operation failed completely, return if (!ok) return false; @@ -411,8 +409,7 @@ namespace oomph { // our recv was cancelled correctly found = true; LF_DEB(com_deb<9>, - debug(NS_DEBUG::str<>("Cancel"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); + debug(str<>("Cancel"), "succeeded", "op_ctx", hptr(op_ctx))); auto ptr = s->release_self_ref(); s->set_canceled(); } diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 2ce3bee1..6b49098a 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -36,8 +36,7 @@ namespace oomph { m_ctxt_tag = reinterpret_cast(this); OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); LF_DEB(src_deb, - debug(NS_DEBUG::str<>("Broadcast"), "rank", debug::dec<3>(rank), "context", - debug::ptr(m_ctxt_tag))); + debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); // TODO fix the thread safety // problem: controller is a singleton and has problems when 2 contexts are created @@ -93,8 +92,8 @@ namespace oomph { if (!instance.get()) { LF_DEB(src_deb, - debug(NS_DEBUG::str<>("New Controller"), "rank", debug::dec<3>(rank), "size", - debug::dec<3>(size), "threads", debug::dec<3>(threads))); + debug(NS_DEBUG::str<>("New Controller"), "rank", dec<3>(rank), "size", dec<3>(size), + "threads", dec<3>(threads))); instance.reset(new controller_type()); if (debug) instance->enable_debug(); instance->initialize(HAVE_LIBFABRIC_PROVIDER, rank == 0, size, threads, comm); diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index cf02c850..e7f0308f 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -114,8 +114,7 @@ namespace oomph { // our recv was cancelled correctly found = true; LF_DEB(oomph::ctx_deb, - debug(NS_DEBUG::str<>("Cancel shared"), "succeeded", "op_ctx", - NS_DEBUG::ptr(op_ctx))); + debug(str<>("Cancel shared"), "succeeded", "op_ctx", hptr(op_ctx))); auto ptr = s->release_self_ref(); s->set_canceled(); } diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index b8df1b70..39c88fd9 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -93,55 +93,52 @@ namespace oomph::libfabric { // send address to rank 0 and receive array of all localities void MPI_exchange_localities(fid_av* av, MPI_Comm comm, int rank, int size) { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); // array of empty locality objects std::vector localities(size); // if (rank > 0) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("sending here"), here_.to_str(), "size", - locality_defs::array_size)); + LF_DEB(cnt_deb<9>, + debug( + str<>("sending here"), here_.to_str(), "size", locality_defs::array_size)); /*int err = */ MPI_Send(here_.fabric_data().data(), locality_defs::array_size, MPI_CHAR, 0, // dst rank 0, // tag comm); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving all"), "size", locality_defs::array_size)); + LF_DEB( + cnt_deb<9>, debug(str<>("receiving all"), "size", locality_defs::array_size)); MPI_Status status; /*err = */ MPI_Recv(localities.data(), size * locality_defs::array_size, MPI_CHAR, 0, // src rank 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("received addresses"))); + LF_DEB(cnt_deb<9>, debug(str<>("received addresses"))); } else { - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("receiving addresses"))); + LF_DEB(cnt_deb<9>, debug(str<>("receiving addresses"))); memcpy(&localities[0], here_.fabric_data().data(), locality_defs::array_size); for (int i = 1; i < size; ++i) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("receiving address"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("receiving address"), dec<>(i))); MPI_Status status; /*int err = */ MPI_Recv(&localities[i], size * locality_defs::array_size, MPI_CHAR, i, // src rank 0, // tag comm, &status); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("received address"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("received address"), dec<>(i))); } - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending all"))); + LF_DEB(cnt_deb<9>, debug(str<>("sending all"))); for (int i = 1; i < size; ++i) { - LF_DEB( - NS_DEBUG::cnt_deb<9>, debug(debug::str<>("sending to"), debug::dec<>(i))); + LF_DEB(cnt_deb<9>, debug(str<>("sending to"), dec<>(i))); /*int err = */ MPI_Send(&localities[0], size * locality_defs::array_size, MPI_CHAR, i, // dst rank @@ -151,7 +148,7 @@ namespace oomph::libfabric { } // all ranks should now have a full localities vector - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("populating vector"))); + LF_DEB(cnt_deb<9>, debug(str<>("populating vector"))); for (int i = 0; i < size; ++i) { locality temp(localities[i], av); @@ -164,18 +161,17 @@ namespace oomph::libfabric { // and insert each one into the address vector void exchange_addresses(fid_av* av, MPI_Comm mpi_comm) { - [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnt_deb<9>.scope(NS_DEBUG::hptr(this), __func__); int rank, size; MPI_Comm_rank(mpi_comm, &rank); MPI_Comm_size(mpi_comm, &size); - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("initialize_localities"), size, "localities")); + LF_DEB(cnt_deb<9>, debug(str<>("initialize_localities"), size, "localities")); MPI_exchange_localities(av, mpi_comm, rank, size); debug_print_av_vector(size); - LF_DEB(NS_DEBUG::cnt_deb<9>, debug(debug::str<>("Done localities"))); + LF_DEB(cnt_deb<9>, debug(str<>("Done localities"))); } // -------------------------------------------------------------------- @@ -252,8 +248,8 @@ namespace oomph::libfabric { if (!bypass_tx_lock() && !lock.owns_lock()) { return -1; } static auto polling = - NS_DEBUG::cnt_deb<9>.make_timer(1, debug::str<>("poll send queue")); - LF_DEB(NS_DEBUG::cnt_deb<9>, timed(polling, NS_DEBUG::ptr(send_cq))); + NS_DEBUG::cnt_deb<9>.make_timer(1, NS_DEBUG::str<>("poll send queue")); + LF_DEB(cnt_deb<9>, timed(polling, hptr(send_cq))); // poll for completions { @@ -269,21 +265,21 @@ namespace oomph::libfabric { // flags might not be set correctly if ((e.flags & (FI_MSG | FI_SEND | FI_TAGGED)) != 0) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_SEND with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error("txcq Error FI_EAVAIL for FI_SEND with len", hex<6>(e.len), + "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", + bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } else if ((e.flags & FI_RMA) != 0) { - NS_DEBUG::cnt_err.error("txcq Error FI_EAVAIL for " - "FI_RMA with len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), "code", - NS_DEBUG::dec<3>(e.err), "flags", debug::bin<16>(e.flags), "error", - fi_cq_strerror( - send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error("txcq Error FI_EAVAIL for FI_RMA with len", hex<6>(e.len), + "context", hptr(e.op_context), "code", dec<3>(e.err), "flags", + bin<16>(e.flags), "error", + fi_cq_strerror( + send_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } operation_context* handler = reinterpret_cast(e.op_context); handler->handle_error(e); @@ -299,17 +295,16 @@ namespace oomph::libfabric { for (int i = 0; i < ret; ++i) { ++sends_complete; - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), i, debug::dec<2>(i), "txcq flags", + LF_DEB(cnt_deb<9>, + debug(str<>("Completion"), i, dec<2>(i), "txcq flags", fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", - debug::hex<6>(entry[i].len))); + dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) { - LF_DEB(NS_DEBUG::cnt_deb<9>, - debug(debug::str<>("Completion"), "txcq tagged send completion", - NS_DEBUG::ptr(entry[i].op_context))); + LF_DEB(cnt_deb<9>, + debug(str<>("Completion"), "txcq tagged send completion", + hptr(entry[i].op_context))); operation_context* handler = reinterpret_cast(entry[i].op_context); @@ -317,8 +312,9 @@ namespace oomph::libfabric { } else { - NS_DEBUG::cnt_err.error("Received an unknown txcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + LF_DEB(cnt_err, + error("Received an unknown txcq completion", dec<>(entry[i].flags), + bin<64>(entry[i].flags))); std::terminate(); } } @@ -328,7 +324,7 @@ namespace oomph::libfabric { { // do nothing, we will try again on the next check } - else { NS_DEBUG::cnt_err.error("unknown error in completion txcq read"); } + else { LF_DEB(cnt_err, error("unknown error in completion txcq read")); } return 0; } @@ -353,8 +349,8 @@ namespace oomph::libfabric { if (!bypass_rx_lock() && !lock.owns_lock()) { return -1; } static auto polling = - NS_DEBUG::cnt_deb<2>.make_timer(1, debug::str<>("poll recv queue")); - LF_DEB(NS_DEBUG::cnt_deb<2>, timed(polling, NS_DEBUG::ptr(rx_cq))); + NS_DEBUG::cnt_deb<2>.make_timer(1, NS_DEBUG::str<>("poll recv queue")); + LF_DEB(cnt_deb<2>, timed(polling, hptr(rx_cq))); // poll for completions { @@ -370,10 +366,9 @@ namespace oomph::libfabric { // from the manpage 'man 3 fi_cq_readerr' if (e.err == FI_ECANCELED) { - LF_DEB(NS_DEBUG::cnt_deb<1>, - debug(debug::str<>("rxcq Cancelled"), "flags", debug::hex<6>(e.flags), - "len", debug::hex<6>(e.len), "context", - NS_DEBUG::ptr(e.op_context))); + LF_DEB(cnt_deb<1>, + debug(str<>("rxcq Cancelled"), "flags", hex<6>(e.flags), "len", + hex<6>(e.len), "context", hptr(e.op_context))); // the request was cancelled, we can simply exit // as the canceller will have doone any cleanup needed operation_context* handler = @@ -383,11 +378,12 @@ namespace oomph::libfabric { } else if (e.err != FI_SUCCESS) { - NS_DEBUG::cnt_err.error(debug::str<>("poll_recv_queue"), "error code", - debug::dec<>(-e.err), "flags", debug::hex<6>(e.flags), "len", - debug::hex<6>(e.len), "context", NS_DEBUG::ptr(e.op_context), - "error msg", - fi_cq_strerror(rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len)); + LF_DEB(cnt_err, + error(str<>("poll_recv_queue"), "error code", dec<>(-e.err), "flags", + hex<6>(e.flags), "len", hex<6>(e.len), "context", + hptr(e.op_context), "error msg", + fi_cq_strerror( + rx_cq, e.prov_errno, e.err_data, (char*) e.buf, e.len))); } operation_context* handler = reinterpret_cast(e.op_context); if (handler) handler->handle_error(e); @@ -403,17 +399,16 @@ namespace oomph::libfabric { for (int i = 0; i < ret; ++i) { ++recvs_complete; - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), i, "rxcq flags", + LF_DEB(cnt_deb<2>, + debug(str<>("Completion"), i, "rxcq flags", fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - debug::dec<>(entry[i].flags), ")", "context", - NS_DEBUG::ptr(entry[i].op_context), "length", - debug::hex<6>(entry[i].len))); + dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) { - LF_DEB(NS_DEBUG::cnt_deb<2>, - debug(debug::str<>("Completion"), "rxcq tagged recv completion", - NS_DEBUG::ptr(entry[i].op_context))); + LF_DEB(cnt_deb<2>, + debug(str<>("Completion"), "rxcq tagged recv completion", + hptr(entry[i].op_context))); operation_context* handler = reinterpret_cast(entry[i].op_context); @@ -421,8 +416,9 @@ namespace oomph::libfabric { } else { - NS_DEBUG::cnt_err.error("Received an unknown rxcq completion", - debug::dec<>(entry[i].flags), debug::bin<64>(entry[i].flags)); + LF_DEB(cnt_err, + error("Received an unknown rxcq completion", dec<>(entry[i].flags), + bin<64>(entry[i].flags))); std::terminate(); } } @@ -432,7 +428,7 @@ namespace oomph::libfabric { { // do nothing, we will try again on the next check } - else { NS_DEBUG::cnt_err.error("unknown error in completion rxcq read"); } + else { LF_DEB(cnt_err, error("unknown error in completion rxcq read")); } return 0; } @@ -442,7 +438,7 @@ namespace oomph::libfabric { (void) info; // unused variable warning (void) tx; // unused variable warning - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); struct fi_info* hints = fi_dupinfo(info); if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); // clear any Rx address data that might be set diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index d97e5fc3..205c40ab 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -239,7 +239,7 @@ namespace NS_LIBFABRIC { template void fidclose(Handle fid, char const* msg) { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("closing"), msg)); + LF_DEB(cnb_deb, debug(str<>("closing"), msg)); int ret = fi_close(fid); if (ret == -FI_EBUSY) { throw NS_LIBFABRIC::fabric_error(ret, "fi_close EBUSY"); } else if (ret == FI_SUCCESS) { return; } @@ -267,7 +267,7 @@ namespace NS_LIBFABRIC { , name_(name) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); } // to keep boost::lockfree happy, we need these copy operators @@ -277,7 +277,7 @@ namespace NS_LIBFABRIC { void cleanup() { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, name_); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, name_); if (ep_) { fidclose(&ep_->fid, "endpoint"); @@ -334,9 +334,9 @@ namespace NS_LIBFABRIC { ~stack_endpoint() { if (!pool_) return; - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "used push", "ep", NS_DEBUG::ptr(get_ep()), - "tx cq", NS_DEBUG::ptr(get_tx_cq()), "rx cq", NS_DEBUG::ptr(get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "used push", "ep", hptr(get_ep()), "tx cq", + hptr(get_tx_cq()), "rx cq", hptr(get_rx_cq()))); pool_->push(endpoint_); } @@ -426,7 +426,7 @@ namespace NS_LIBFABRIC { void finvoke(char const* msg, char const* err, int ret) { - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>(msg))); + LF_DEB(cnb_deb, trace(str<>(msg))); if (ret) throw NS_LIBFABRIC::fabric_error(ret, err); } @@ -460,16 +460,15 @@ namespace NS_LIBFABRIC { // clean up all resources ~controller_base() { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); unsigned int messages_handled_ = 0; unsigned int rma_reads_ = 0; unsigned int recv_deletes_ = 0; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("counters"), "Received messages", - debug::dec<>(messages_handled_), "Total reads", debug::dec<>(rma_reads_), - "Total deletes", debug::dec<>(recv_deletes_), "deletes error", - debug::dec<>(messages_handled_ - recv_deletes_))); + LF_DEB(cnb_deb, + debug(str<>("counters"), "Received messages", dec<>(messages_handled_), + "Total reads", dec<>(rma_reads_), "Total deletes", dec<>(recv_deletes_), + "deletes error", dec<>(messages_handled_ - recv_deletes_))); tx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); rx_endpoints_.consume_all([](auto&& ep) { ep.cleanup(); }); @@ -502,7 +501,7 @@ namespace NS_LIBFABRIC { fidclose(&fabric_->fid, "Fabric"); // clean up - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("freeing fabric_info"))); + LF_DEB(cnb_deb, debug(str<>("freeing fabric_info"))); fi_freeinfo(fabric_info_); } @@ -517,7 +516,7 @@ namespace NS_LIBFABRIC { endpoint_wrapper create_rx_endpoint( struct fid_domain* domain, struct fi_info* info, struct fid_av* av) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); auto ep_rx = new_endpoint_active(domain, info, false); // bind address vector @@ -538,26 +537,23 @@ namespace NS_LIBFABRIC { void initialize( std::string const& provider, bool rootnode, int size, size_t threads, Args&&... args) { - LF_DEB(NS_DEBUG::cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + LF_DEB(cnb_deb, eval([]() { std::cout.setf(std::ios::unitbuf); })); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); max_completions_per_poll_ = libfabric_completions_per_poll(); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Poll completions"), debug::dec<3>(max_completions_per_poll_))); + LF_DEB(cnb_err, debug(str<>("Poll completions"), dec<3>(max_completions_per_poll_))); uint32_t default_val = (threads == 1) ? 0x400 : 0x4000; msg_rendezvous_threshold_ = libfabric_rendezvous_threshold(default_val); - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("Rendezvous threshold"), - debug::hex<4>(msg_rendezvous_threshold_))); + LF_DEB( + cnb_err, debug(str<>("Rendezvous threshold"), hex<4>(msg_rendezvous_threshold_))); endpoint_type_ = static_cast(libfabric_endpoint_type()); - LF_DEB( - NS_DEBUG::cnb_err, debug(debug::str<>("Endpoints"), libfabric_endpoint_string())); + LF_DEB(cnb_err, debug(str<>("Endpoints"), libfabric_endpoint_string())); eps_ = std::make_unique(); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Threads"), debug::dec<3>(threads))); + LF_DEB(cnb_deb, debug(str<>("Threads"), dec<3>(threads))); open_fabric(provider, threads, rootnode); @@ -625,9 +621,9 @@ namespace NS_LIBFABRIC { auto ep_sx = new_endpoint_scalable( fabric_domain_, fabric_info_, true /*Tx*/, threads, threads_allocated); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("scalable endpoint ok"), "Contexts allocated", - debug::dec<4>(threads_allocated))); + LF_DEB(cnb_deb, + trace(str<>("scalable endpoint ok"), "Contexts allocated", + dec<4>(threads_allocated))); finvoke("fi_scalable_ep_bind AV", "fi_scalable_ep_bind", fi_scalable_ep_bind(ep_sx, &av_->fid, 0)); @@ -637,8 +633,8 @@ namespace NS_LIBFABRIC { // for (unsigned int i = 0; i < threads_allocated; i++) { - [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope( + NS_DEBUG::hptr(this), "scalable", NS_DEBUG::dec<4>(i)); // For threadlocal/scalable endpoints, tx/rx resources fid_ep* scalable_ep_tx; @@ -654,10 +650,9 @@ namespace NS_LIBFABRIC { enable_endpoint(scalable_ep_tx, "tx scalable"); endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", - NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(tx.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "initial tx push", "ep", hptr(tx.get_ep()), + "tx cq", hptr(tx.get_tx_cq()), "rx cq", hptr(tx.get_rx_cq()))); tx_endpoints_.push(tx); } @@ -667,7 +662,7 @@ namespace NS_LIBFABRIC { // once enabled we can get the address enable_endpoint(eps_->ep_rx_.get_ep(), "rx here"); here_ = get_endpoint_address(&eps_->ep_rx_.get_ep()->fid); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting 'here'"), here_.to_str())); + LF_DEB(cnb_deb, debug(str<>("setting 'here'"), here_.to_str())); // // if we are using scalable endpoints, then setup tx/rx contexts // // we will us a single endpoint for all Tx/Rx contexts @@ -682,8 +677,8 @@ namespace NS_LIBFABRIC { // if (!ep_sx) // throw NS_LIBFABRIC::fabric_error(FI_EOTHER, "fi_scalable endpoint creation failed"); - // LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("scalable endpoint ok"), - // "Contexts allocated", debug::dec<4>(threads_allocated))); + // LF_DEB(cnb_deb, trace(str<>("scalable endpoint ok"), + // "Contexts allocated", dec<4>(threads_allocated))); // // prepare the stack for insertions // tx_endpoints_.reserve(threads_allocated); @@ -692,7 +687,7 @@ namespace NS_LIBFABRIC { // for (unsigned int i = 0; i < threads_allocated; i++) // { // [[maybe_unused]] auto scp = - // NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "scalable", debug::dec<4>(i)); + // NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "scalable", dec<4>(i)); // // For threadlocal/scalable endpoints, tx/rx resources // fid_ep* scalable_ep_tx; @@ -712,8 +707,8 @@ namespace NS_LIBFABRIC { // enable_endpoint(scalable_ep_tx, "tx scalable"); // endpoint_wrapper tx(scalable_ep_tx, nullptr, scalable_cq_tx, "tx scalable"); - // LF_DEB(NS_DEBUG::cnb_deb, - // trace(debug::str<>("Scalable Ep"), "initial tx push", "ep", + // LF_DEB(cnb_deb, + // trace(str<>("Scalable Ep"), "initial tx push", "ep", // NS_DEBUG::ptr(tx.get_ep()), "tx cq", NS_DEBUG::ptr(tx.get_tx_cq()), "rx cq", // NS_DEBUG::ptr(tx.get_rx_cq()))); // tx_endpoints_.push(tx); @@ -730,8 +725,8 @@ namespace NS_LIBFABRIC { //// enable_endpoint(scalable_ep_rx, "rx scalable"); //// endpoint_wrapper rx(scalable_ep_rx, scalable_cq_rx, nullptr, "rx scalable"); - //// LF_DEB(NS_DEBUG::cnb_deb, - //// trace(debug::str<>("Scalable Ep"), "initial rx push", "ep", + //// LF_DEB(cnb_deb, + //// trace(str<>("Scalable Ep"), "initial rx push", "ep", //// NS_DEBUG::ptr(rx.get_ep()), "tx cq", NS_DEBUG::ptr(rx.get_tx_cq()), "rx cq", //// NS_DEBUG::ptr(rx.get_rx_cq()))); //// rx_endpoints_.push(rx); @@ -758,8 +753,8 @@ namespace NS_LIBFABRIC { uint64_t f = (1ULL << bit); if ((required_flags & f) && ((available_flags & f) == 0)) { - NS_DEBUG::cnb_err.error( - debug::str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS)); + LF_DEB(cnb_err, + error(str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS))); final_flags &= ~f; } } @@ -798,7 +793,7 @@ namespace NS_LIBFABRIC { // initialize the basic fabric/domain/name void open_fabric(std::string const& provider, int threads, bool rootnode) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); struct fi_info* fabric_hints_ = fi_allocinfo(); if (!fabric_hints_) @@ -814,19 +809,18 @@ namespace NS_LIBFABRIC { strdup(std::string(provider + ";ofi_rxm").c_str()); } else { fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); + LF_DEB(cnb_deb, debug(str<>("fabric provider"), fabric_hints_->fabric_attr->prov_name)); #if defined(HAVE_LIBFABRIC_CXI) // libfabric domain for multi-nic CXI provider char const* cxi_domain = std::getenv("FI_CXI_DEVICE_NAME"); if (cxi_domain == nullptr) { - LF_DEB(NS_DEBUG::cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); + LF_DEB(cnb_err, error(str<>("Domain"), "FI_CXI_DEVICE_NAME not set")); } else { fabric_hints_->domain_attr->name = strdup(cxi_domain); } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("fabric domain"), fabric_hints_->domain_attr->name)); + LF_DEB( + NS_DEBUG::cnb_deb, debug(str<>("fabric domain"), fabric_hints_->domain_attr->name)); #endif fabric_hints_->domain_attr->mr_mode = memory_registration_mode_flags(); @@ -841,8 +835,8 @@ namespace NS_LIBFABRIC { char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); if (info_str) { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "pre-check ->", + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "pre-check ->", fabric_hints_->fabric_attr->prov_name, "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } @@ -851,8 +845,8 @@ namespace NS_LIBFABRIC { fabric_hints_->caps = caps_flags(fabric_info_->caps); if ((fabric_info_->mode & FI_CONTEXT) == 0) { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("mode FI_CONTEXT!=0"), + LF_DEB(cnb_err, + debug(str<>("mode FI_CONTEXT!=0"), fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); } fabric_hints_->mode = fabric_info_->mode; @@ -864,11 +858,11 @@ namespace NS_LIBFABRIC { auto progress = libfabric_progress_type(); fabric_hints_->domain_attr->control_progress = progress; fabric_hints_->domain_attr->data_progress = progress; - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("progress"), libfabric_progress_string())); + LF_DEB(cnb_err, debug(str<>("progress"), libfabric_progress_string())); if (threads > 1) { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_FID"))); + LF_DEB(cnb_deb, debug(str<>("FI_THREAD_FID"))); // Enable thread safe mode (Does not work with psm2 provider) // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; // fabric_hints_->domain_attr->threading = FI_THREAD_FID; @@ -876,7 +870,7 @@ namespace NS_LIBFABRIC { } else { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("FI_THREAD_DOMAIN"))); + LF_DEB(cnb_deb, debug(str<>("FI_THREAD_DOMAIN"))); // we serialize everything fabric_hints_->domain_attr->threading = FI_THREAD_DOMAIN; } @@ -884,13 +878,12 @@ namespace NS_LIBFABRIC { // Enable resource management fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fabric endpoint"), "RDM")); + LF_DEB(cnb_deb, debug(str<>("fabric endpoint"), "RDM")); fabric_hints_->ep_attr->type = FI_EP_RDM; - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("get fabric info"), "FI_VERSION", - debug::dec(LIBFABRIC_FI_VERSION_MAJOR), - debug::dec(LIBFABRIC_FI_VERSION_MINOR))); + LF_DEB(cnb_deb, + debug(str<>("get fabric info"), "FI_VERSION", dec(LIBFABRIC_FI_VERSION_MAJOR), + dec(LIBFABRIC_FI_VERSION_MINOR))); ret = fi_getinfo(FI_VERSION(LIBFABRIC_FI_VERSION_MAJOR, LIBFABRIC_FI_VERSION_MINOR), nullptr, nullptr, flags, fabric_hints_, &fabric_info_); @@ -898,57 +891,57 @@ namespace NS_LIBFABRIC { if (rootnode) { - LF_DEB(NS_DEBUG::cnb_err, - trace(debug::str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); } int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_PROV_KEY"), mrkey)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_PROV_KEY"), mrkey)); bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_CONTEXT"), context)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_CONTEXT"), context)); mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_LOCAL"), mrlocal)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_LOCAL"), mrlocal)); mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ENDPOINT"), mrbind)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ENDPOINT"), mrbind)); /* Check if provider requires heterogeneous memory registration */ mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_HMEM"), mrhmem)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_HMEM"), mrhmem)); bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating fi_fabric"))); + LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); // Allocate a domain. - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Allocating domain"))); + LF_DEB(cnb_deb, debug(str<>("Allocating domain"))); ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_domain"); #if defined(HAVE_LIBFABRIC_GNI) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), "GNI memory registration block"); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), "GNI memory registration block"); - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI String values")); + LF_DEB(cnb_err, debug(str<>("-------"), "GNI String values")); // Dump out all vars for debug purposes for (auto& gni_data : gni_strs) { _set_check_domain_op_value( gni_data.first, 0, gni_data.second.c_str(), false); } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"), "GNI Int values")); + LF_DEB(cnb_err, debug(str<>("-------"), "GNI Int values")); for (auto& gni_data : gni_ints) { _set_check_domain_op_value( gni_data.first, 0, gni_data.second.c_str(), false); } - LF_DEB(NS_DEBUG::cnb_err, debug(debug::str<>("-------"))); + LF_DEB(cnb_err, debug(str<>("-------"))); // -------------------------- // GNI_MR_CACHE @@ -971,7 +964,7 @@ namespace NS_LIBFABRIC { // Enable lazy deregistration in MR cache // int32_t enable = 1; - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); + LF_DEB(cnb_deb, debug(str<>("setting GNI_MR_CACHE_LAZY_DEREG"))); _set_check_domain_op_value( GNI_MR_CACHE_LAZY_DEREG, enable, "GNI_MR_CACHE_LAZY_DEREG"); @@ -1012,7 +1005,7 @@ namespace NS_LIBFABRIC { template int _set_check_domain_op_value(int op, T value, char const* info, bool set = true) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); static struct fi_gni_ops_domain* gni_domain_ops = nullptr; int ret = 0; @@ -1020,8 +1013,8 @@ namespace NS_LIBFABRIC { { ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, (void**) &gni_domain_ops, nullptr); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), + LF_DEB(cnb_deb, + debug(str<>("gni open ops"), (ret == 0 ? "OK" : "FAIL"), NS_DEBUG::ptr(gni_domain_ops))); } @@ -1031,8 +1024,7 @@ namespace NS_LIBFABRIC { ret = gni_domain_ops->set_val( &fabric_domain_->fid, (dom_ops_val_t) (op), reinterpret_cast(&value)); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); + LF_DEB(cnb_deb, debug(str<>("gni set ops val"), value, (ret == 0 ? "OK" : "FAIL"))); } // Get the value (so we can check that the value we set is now returned) @@ -1040,14 +1032,14 @@ namespace NS_LIBFABRIC { ret = gni_domain_ops->get_val(&fabric_domain_->fid, (dom_ops_val_t) (op), &new_value); if constexpr (std::is_integral::value) { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, - debug::hex<8>(new_value))); + LF_DEB(cnb_err, + debug( + str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, hex<8>(new_value))); } else { - LF_DEB(NS_DEBUG::cnb_err, - debug(debug::str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); + LF_DEB(cnb_err, + debug(str<>("gni op val"), (ret == 0 ? "OK" : "FAIL"), info, new_value)); } // if (ret) throw NS_LIBFABRIC::fabric_error(ret, std::string("setting ") + info); @@ -1066,9 +1058,8 @@ namespace NS_LIBFABRIC { // and we do not create two endpoint with the same src address struct fi_info* hints = set_src_dst_addresses(info, tx); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); + LF_DEB(cnb_deb, debug(str<>("Got info mode"), (info->mode & FI_NOTIFY_FLAGS_ONLY))); struct fid_ep* ep; int ret = fi_endpoint(domain, hints, &ep, nullptr); @@ -1078,8 +1069,7 @@ namespace NS_LIBFABRIC { ret, "fi_endpoint (too many threadlocal endpoints?)"); } fi_freeinfo(hints); - LF_DEB( - NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_active"), NS_DEBUG::ptr(ep))); + LF_DEB(cnb_deb, debug(str<>("new_endpoint_active"), hptr(ep))); return ep; } @@ -1090,9 +1080,9 @@ namespace NS_LIBFABRIC { // don't allow multiple threads to call endpoint create at the same time scoped_lock lock(controller_mutex_); - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("fi_dupinfo"))); + LF_DEB(cnb_deb, debug(str<>("fi_dupinfo"))); struct fi_info* hints = fi_dupinfo(info); if (!hints) throw NS_LIBFABRIC::fabric_error(0, "fi_dupinfo"); @@ -1108,13 +1098,13 @@ namespace NS_LIBFABRIC { else { context_count = std::min(new_hints->domain_attr->rx_ctx_cnt, threads); } // clang-format off - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("scalable endpoint"), + LF_DEB(cnb_deb, + trace(str<>("scalable endpoint"), "Tx", tx, - "Threads", debug::dec<3>(threads), - "tx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->tx_ctx_cnt), - "rx_ctx_cnt", debug::dec<3>(new_hints->domain_attr->rx_ctx_cnt), - "context_count", debug::dec<3>(context_count))); + "Threads", dec<3>(threads), + "tx_ctx_cnt", dec<3>(new_hints->domain_attr->tx_ctx_cnt), + "rx_ctx_cnt", dec<3>(new_hints->domain_attr->rx_ctx_cnt), + "context_count", dec<3>(context_count))); // clang-format on threads_allocated = context_count; @@ -1124,8 +1114,7 @@ namespace NS_LIBFABRIC { struct fid_ep* ep; ret = fi_scalable_ep(domain, new_hints, &ep, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_scalable_ep"); - LF_DEB( - NS_DEBUG::cnb_deb, debug(debug::str<>("new_endpoint_scalable"), NS_DEBUG::ptr(ep))); + LF_DEB(cnb_deb, debug(str<>("new_endpoint_scalable"), hptr(ep))); fi_freeinfo(hints); return ep; } @@ -1133,8 +1122,8 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- endpoint_wrapper& get_rx_endpoint() { - static auto rx = NS_DEBUG::cnb_deb.make_timer(1, debug::str<>("get_rx_endpoint")); - LF_DEB(NS_DEBUG::cnb_deb, timed(rx)); + static auto rx = NS_DEBUG::cnb_deb.make_timer(1, NS_DEBUG::str<>("get_rx_endpoint")); + LF_DEB(cnb_deb, timed(rx)); if (endpoint_type_ == endpoint_type::scalableTxRx) { @@ -1145,20 +1134,19 @@ namespace NS_LIBFABRIC { if (!ok) { // clang-format off - LF_DEB(NS_DEBUG::cnb_deb, error(debug::str<>("Scalable Ep"), "pop rx", - "ep", NS_DEBUG::ptr(ep.get_ep()), - "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + LF_DEB(cnb_deb, error(str<>("Scalable Ep"), "pop rx", + "ep", hptr(ep.get_ep()), + "tx cq", hptr(ep.get_tx_cq()), + "rx cq", hptr(ep.get_rx_cq()))); // clang-format on throw std::runtime_error("rx endpoint wrapper pop fail"); } eps_->tl_srx_ = stack_endpoint( ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &rx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "pop rx", "ep", - NS_DEBUG::ptr(eps_->tl_srx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_srx_.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "pop rx", "ep", hptr(eps_->tl_srx_.get_ep()), + "tx cq", hptr(eps_->tl_srx_.get_tx_cq()), "rx cq", + hptr(eps_->tl_srx_.get_rx_cq()))); } return eps_->tl_srx_.endpoint_; } @@ -1174,7 +1162,7 @@ namespace NS_LIBFABRIC { if (eps_->tl_tx_.get_ep() == nullptr) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, "threadlocal"); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, "threadlocal"); // create a completion queue for tx endpoint fabric_info_->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); @@ -1191,10 +1179,9 @@ namespace NS_LIBFABRIC { enable_endpoint(ep_tx, "tx threadlocal"); // set threadlocal endpoint wrapper - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Threadlocal Ep"), "create Tx", "ep", - NS_DEBUG::ptr(ep_tx), "tx cq", NS_DEBUG::ptr(tx_cq), "rx cq", - NS_DEBUG::ptr(nullptr))); + LF_DEB(cnb_deb, + trace(str<>("Threadlocal Ep"), "create Tx", "ep", hptr(ep_tx), "tx cq", + hptr(tx_cq), "rx cq", hptr(nullptr))); // for cleaning up at termination endpoint_wrapper ep(ep_tx, nullptr, tx_cq, "tx threadlocal"); tx_endpoints_.push(ep); @@ -1211,19 +1198,17 @@ namespace NS_LIBFABRIC { bool ok = tx_endpoints_.pop(ep); if (!ok) { - LF_DEB(NS_DEBUG::cnb_deb, - error(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(ep.get_ep()), "tx cq", NS_DEBUG::ptr(ep.get_tx_cq()), - "rx cq", NS_DEBUG::ptr(ep.get_rx_cq()))); + LF_DEB(cnb_deb, + error(str<>("Scalable Ep"), "pop tx", "ep", hptr(ep.get_ep()), "tx cq", + hptr(ep.get_tx_cq()), "rx cq", hptr(ep.get_rx_cq()))); throw std::runtime_error("tx endpoint wrapper pop fail"); } eps_->tl_stx_ = stack_endpoint( ep.get_ep(), ep.get_rx_cq(), ep.get_tx_cq(), ep.get_name(), &tx_endpoints_); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("Scalable Ep"), "pop tx", "ep", - NS_DEBUG::ptr(eps_->tl_stx_.get_ep()), "tx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_tx_cq()), "rx cq", - NS_DEBUG::ptr(eps_->tl_stx_.get_rx_cq()))); + LF_DEB(cnb_deb, + trace(str<>("Scalable Ep"), "pop tx", "ep", hptr(eps_->tl_stx_.get_ep()), + "tx cq", hptr(eps_->tl_stx_.get_tx_cq()), "rx cq", + hptr(eps_->tl_stx_.get_rx_cq()))); } return eps_->tl_stx_.endpoint_; } @@ -1235,10 +1220,9 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- void bind_address_vector_to_endpoint(struct fid_ep* endpoint, struct fid_av* av) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding AV"), "to", NS_DEBUG::ptr(endpoint))); + LF_DEB(cnb_deb, debug(str<>("Binding AV"), "to", hptr(endpoint))); int ret = fi_ep_bind(endpoint, &av->fid, 0); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind address_vector"); } @@ -1248,10 +1232,9 @@ namespace NS_LIBFABRIC { struct fid_ep* endpoint, struct fid_cq*& cq, uint32_t cqtype, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Binding CQ"), "to", NS_DEBUG::ptr(endpoint), type)); + LF_DEB(cnb_deb, debug(str<>("Binding CQ"), "to", hptr(endpoint), type)); int ret = fi_ep_bind(endpoint, &cq->fid, cqtype); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "bind cq"); } @@ -1259,7 +1242,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_cq* bind_tx_queue_to_rx_endpoint(struct fi_info* info, struct fid_ep* ep) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); info->tx_attr->op_flags |= (FI_INJECT_COMPLETE | FI_COMPLETION); fid_cq* tx_cq = create_completion_queue(fabric_domain_, info->tx_attr->size, "tx->rx"); // shared send/recv endpoint - bind send cq to the recv endpoint @@ -1271,10 +1254,9 @@ namespace NS_LIBFABRIC { void enable_endpoint(struct fid_ep* endpoint, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("Enabling endpoint"), NS_DEBUG::ptr(endpoint))); + LF_DEB(cnb_deb, debug(str<>("Enabling endpoint"), hptr(endpoint))); int ret = fi_enable(endpoint); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_enable"); } @@ -1282,7 +1264,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- locality get_endpoint_address(struct fid* id) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); locality::locality_data local_addr; std::size_t addrlen = locality_defs::array_size; @@ -1297,17 +1279,16 @@ namespace NS_LIBFABRIC { // optimized out when debug logging is false if constexpr (NS_DEBUG::cnb_deb.is_enabled()) { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), "size", debug::dec<4>(addrlen), " : ", + LF_DEB(cnb_deb, + debug(str<>("raw address data"), "size", dec<4>(addrlen), " : ", locality(local_addr, av_).to_str())); std::stringstream temp2; for (std::size_t i = 0; i < locality_defs::array_length; ++i) { - temp2 << debug::hex<8>(local_addr[i]) << " - "; + temp2 << NS_DEBUG::hex<8>(local_addr[i]) << " - "; } - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("raw address data"), temp2.str().c_str())); + LF_DEB(cnb_deb, debug(str<>("raw address data"), temp2.str().c_str())); } return locality(local_addr, av_); } @@ -1315,7 +1296,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_pep* create_passive_endpoint(struct fid_fabric* fabric, struct fi_info* info) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); struct fid_pep* ep; int ret = fi_passive_ep(fabric, info, &ep, nullptr); @@ -1373,14 +1354,13 @@ namespace NS_LIBFABRIC { addr.set_fi_address(fi_addr_t(i)); if ((ret == 0) && (addrlen <= locality_defs::array_size)) { - LF_DEB(NS_DEBUG::cnb_deb, - debug(debug::str<>("address vector"), debug::dec<3>(i), addr.to_str())); + LF_DEB(cnb_deb, debug(str<>("address vector"), dec<3>(i), addr.to_str())); } else { - LF_DEB(NS_DEBUG::cnb_err, - error(debug::str<>("address length"), debug::dec<3>(addrlen), - debug::dec<3>(locality_defs::array_size))); + LF_DEB(cnb_err, + error(str<>("address length"), dec<3>(addrlen), + dec<3>(locality_defs::array_size))); throw std::runtime_error("debug_print_av_vector : address vector " "traversal failure"); } @@ -1477,7 +1457,7 @@ namespace NS_LIBFABRIC { struct fid_domain* domain, size_t size, char const* type) { [[maybe_unused]] auto scp = - NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__, type); + NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__, type); struct fid_cq* cq; fi_cq_attr cq_attr = {}; @@ -1486,7 +1466,7 @@ namespace NS_LIBFABRIC { cq_attr.wait_cond = FI_CQ_COND_NONE; cq_attr.size = size; cq_attr.flags = 0 /*FI_COMPLETION*/; - LF_DEB(NS_DEBUG::cnb_deb, trace(debug::str<>("CQ size"), debug::dec<4>(size))); + LF_DEB(cnb_deb, trace(str<>("CQ size"), dec<4>(size))); // open completion queue on fabric domain and set context to null int ret = fi_cq_open(domain, &cq_attr, &cq, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_cq_open"); @@ -1496,7 +1476,7 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- fid_av* create_address_vector(struct fi_info* info, int N, int num_rx_contexts) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); fid_av* av; fi_av_attr av_attr = {fi_av_type(0), 0, 0, 0, nullptr, nullptr, 0}; @@ -1508,7 +1488,7 @@ namespace NS_LIBFABRIC { int rx_ctx_bits = 0; #ifdef RX_CONTEXTS_SUPPORT while (num_rx_contexts >> ++rx_ctx_bits); - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("rx_ctx_bits"), rx_ctx_bits)); + LF_DEB(cnb_deb, debug(str<>("rx_ctx_bits"), rx_ctx_bits)); #endif av_attr.rx_ctx_bits = rx_ctx_bits; // if contexts is nonzero, then we are using a single scalable endpoint @@ -1520,11 +1500,11 @@ namespace NS_LIBFABRIC { } else { - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("map FI_AV_TABLE"))); + LF_DEB(cnb_deb, debug(str<>("map FI_AV_TABLE"))); av_attr.type = FI_AV_TABLE; } - LF_DEB(NS_DEBUG::cnb_deb, debug(debug::str<>("Creating AV"))); + LF_DEB(cnb_deb, debug(str<>("Creating AV"))); int ret = fi_av_open(fabric_domain_, &av_attr, &av, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "fi_av_open"); return av; @@ -1536,23 +1516,22 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- locality insert_address(fid_av* av, locality const& address) { - [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = NS_DEBUG::cnb_deb.scope(NS_DEBUG::hptr(this), __func__); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("inserting AV"), address.to_str(), NS_DEBUG::ptr(av))); + LF_DEB(cnb_deb, trace(str<>("inserting AV"), address.to_str(), hptr(av))); fi_addr_t fi_addr = 0xffff'ffff; int ret = fi_av_insert(av, address.fabric_data().data(), 1, &fi_addr, 0, nullptr); if (ret < 0) { throw NS_LIBFABRIC::fabric_error(ret, "fi_av_insert"); } else if (ret == 0) { - NS_DEBUG::cnb_deb.error("fi_av_insert called with existing address"); + LF_DEB(cnb_deb, error("fi_av_insert called with existing address")); NS_LIBFABRIC::fabric_error(ret, "fi_av_insert did not return 1"); } // address was generated correctly, now update the locality with the fi_addr locality new_locality(address, fi_addr, av); - LF_DEB(NS_DEBUG::cnb_deb, - trace(debug::str<>("AV add"), "rank", debug::dec<>(fi_addr), new_locality.to_str(), - "fi_addr", debug::hex<4>(fi_addr))); + LF_DEB(cnb_deb, + trace(str<>("AV add"), "rank", dec<>(fi_addr), new_locality.to_str(), "fi_addr", + hex<4>(fi_addr))); return new_locality; } }; diff --git a/src/libfabric/memory_region.hpp b/src/libfabric/memory_region.hpp index f2cd5d45..2028fc41 100644 --- a/src/libfabric/memory_region.hpp +++ b/src/libfabric/memory_region.hpp @@ -72,7 +72,7 @@ struct fi_mr_attr { struct fid_mr** mr) { [[maybe_unused]] auto scp = NS_MEMORY::mrn_deb.scope( - __func__, NS_DEBUG::ptr(buf), NS_DEBUG::dec<>(len), device_id); + __func__, NS_DEBUG::hptr(buf), NS_DEBUG::dec<>(len), device_id); // struct iovec addresses = {/*.iov_base = */ const_cast(buf), /*.iov_len = */ len}; fi_mr_attr attr = { @@ -263,18 +263,17 @@ struct fi_mr_attr { { (void) region; #if 1 || has_debug + using namespace NS_DEBUG; os << "region " - << NS_DEBUG::ptr(®ion) - //<< " fi_region " << NS_DEBUG::ptr(region.region_) - << " address " << NS_DEBUG::ptr(region.address_) << " size " - << NS_DEBUG::hex<6>(region.size_) - //<< " used_space " << NS_DEBUG::hex<6>(region.used_space_/*size_*/) + << hptr(®ion) + //<< " fi_region " << hptr(region.region_) + << " address " << hptr(region.address_) << " size " + << hex<6>(region.size_) + //<< " used_space " << hex<6>(region.used_space_/*size_*/) << " loc key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_local_key(region.region_) : nullptr) + << hptr(region.region_ ? region_provider::get_local_key(region.region_) : nullptr) << " rem key " - << NS_DEBUG::ptr( - region.region_ ? region_provider::get_remote_key(region.region_) : 0); + << hptr(region.region_ ? region_provider::get_remote_key(region.region_) : 0); ///// clang-format off ///// clang-format on #endif @@ -352,28 +351,25 @@ struct fi_mr_attr { region_ = nullptr; // base_addr_ = memory_handle::address_; - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("memory_segment"), *this, device_id)); + LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("memory_segment"), *this, device_id)); int ret = region_provider::fi_register_memory(pd, device_id, buffer, length, region_provider::access_flags(), 0, key++, &(region_)); if (!ret) { LF_DEB(NS_MEMORY::mrn_deb, - trace(NS_DEBUG::str<>("Registered region"), "device", device_id, *this)); + trace(str<>("Registered region"), "device", device_id, *this)); } if (bind_mr) { ret = fi_mr_bind(region_, (struct fid*) ep, 0); if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_bind"); } - else { LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Bound region"), *this)); } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Bound region"), *this)); } ret = fi_mr_enable(region_); if (ret) { throw NS_LIBFABRIC::fabric_error(int(ret), "fi_mr_enable"); } - else - { - LF_DEB(NS_MEMORY::mrn_deb, trace(NS_DEBUG::str<>("Enabled region"), *this)); - } + else { LF_DEB(NS_MEMORY::mrn_deb, trace(str<>("Enabled region"), *this)); } } } @@ -398,7 +394,7 @@ struct fi_mr_attr { #if has_debug // clang-format off os << *static_cast(®ion) - << " base address " << NS_DEBUG::ptr(region.base_addr_); + << " base address " << NS_DEBUG::hptr(region.base_addr_); // clang-format on #endif return os; diff --git a/src/libfabric/operation_context.cpp b/src/libfabric/operation_context.cpp index 8c8d277f..0f6de97a 100644 --- a/src/libfabric/operation_context.cpp +++ b/src/libfabric/operation_context.cpp @@ -16,7 +16,7 @@ namespace oomph::libfabric { void operation_context::handle_cancelled() { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); // enqueue the cancelled/callback if (std::holds_alternative(m_req)) { @@ -35,7 +35,7 @@ namespace oomph::libfabric { int operation_context::handle_tagged_recv_completion_impl(void* user_data) { - [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = opctx_deb<1>.scope(NS_DEBUG::hptr(this), __func__); if (std::holds_alternative(m_req)) { // regular (non-shared) recv @@ -82,7 +82,7 @@ namespace oomph::libfabric { detail::request_state** req = reinterpret_cast(&m_req); LF_DEB(NS_MEMORY::opctx_deb<9>, error( - NS_DEBUG::str<>("invalid request_state"), this, "request", NS_DEBUG::ptr(req))); + str<>("invalid request_state"), this, "request", hptr(req))); throw std::runtime_error("Request state invalid in handle_tagged_recv"); } return 1; diff --git a/src/libfabric/operation_context.hpp b/src/libfabric/operation_context.hpp index 74d6ba09..faed3d70 100644 --- a/src/libfabric/operation_context.hpp +++ b/src/libfabric/operation_context.hpp @@ -33,7 +33,7 @@ namespace oomph::libfabric { , m_req{req} { [[maybe_unused]] auto scp = - opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__, "request", req); + opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__, "request", req); } // -------------------------------------------------------------------- diff --git a/src/libfabric/operation_context_base.hpp b/src/libfabric/operation_context_base.hpp index 5de5c386..462c79b5 100644 --- a/src/libfabric/operation_context_base.hpp +++ b/src/libfabric/operation_context_base.hpp @@ -33,7 +33,7 @@ namespace NS_LIBFABRIC { operation_context_base() : context_reserved_space() { - [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::ptr(this), __func__); + [[maybe_unused]] auto scp = ctx_bas.scope(NS_DEBUG::hptr(this), __func__); } // error diff --git a/src/libfabric/print.hpp b/src/libfabric/print.hpp index 301f8e12..04364b98 100644 --- a/src/libfabric/print.hpp +++ b/src/libfabric/print.hpp @@ -118,18 +118,18 @@ namespace NS_DEBUG { // ------------------------------------------------------------------ // format as pointer // ------------------------------------------------------------------ - struct ptr + struct hptr { - ptr(void const* v) + hptr(void const* v) : data_(v) { } - ptr(std::uintptr_t const v) + hptr(std::uintptr_t const v) : data_(reinterpret_cast(v)) { } void const* data_; - friend std::ostream& operator<<(std::ostream& os, ptr const& d) + friend std::ostream& operator<<(std::ostream& os, hptr const& d) { os << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase << std::hex << reinterpret_cast(d.data_); @@ -230,32 +230,6 @@ namespace NS_DEBUG { } }; - // ------------------------------------------------------------------ - // format as ip address - // ------------------------------------------------------------------ - struct ipaddr - { - ipaddr(void const* a) - : data_(reinterpret_cast(a)) - , ipdata_(0) - { - } - ipaddr(uint32_t const a) - : data_(reinterpret_cast(&ipdata_)) - , ipdata_(a) - { - } - uint8_t const* data_; - uint32_t const ipdata_; - - friend std::ostream& operator<<(std::ostream& os, ipaddr const& p) - { - os << std::dec << int(p.data_[0]) << "." << int(p.data_[1]) << "." << int(p.data_[2]) - << "." << int(p.data_[3]); - return os; - } - }; - // ------------------------------------------------------------------ // helper fuction for printing CRC32 // ------------------------------------------------------------------ @@ -284,9 +258,10 @@ namespace NS_DEBUG { char const* txt_; friend std::ostream& operator<<(std::ostream& os, mem_crc32 const& p) { + using namespace NS_DEBUG; std::uint8_t const* byte = static_cast(p.addr_); os << "Memory:"; - os << " address " << ptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) + os << " address " << hptr(p.addr_) << " length " << hex<6, std::size_t>(p.len_) << " CRC32:" << hex<8, std::size_t>(crc32(p.addr_, p.len_)) << "\n"; size_t i = 0; while (i < std::min(size_t(128), p.len_)) diff --git a/src/libfabric/request_state.hpp b/src/libfabric/request_state.hpp index 58f15dd5..74958fc5 100644 --- a/src/libfabric/request_state.hpp +++ b/src/libfabric/request_state.hpp @@ -69,13 +69,13 @@ namespace oomph { namespace detail { , m_operation_context{this} { [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); } ~shared_request_state() { [[maybe_unused]] auto scp = - libfabric::opctx_deb<9>.scope(NS_DEBUG::ptr(this), __func__); + libfabric::opctx_deb<9>.scope(NS_DEBUG::hptr(this), __func__); } void progress(); From b950074fbbed841e5ca84889e7cbc2051a8334ea Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 10 Jul 2025 11:49:17 +0200 Subject: [PATCH 62/68] Use safe fi_tostr_r and a std::array buffer in place of fi_tostr --- src/libfabric/controller.hpp | 12 ++++-- src/libfabric/controller_base.hpp | 68 +++++++++++++++++++------------ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 39c88fd9..1b8e3b55 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -291,14 +291,16 @@ namespace oomph::libfabric { // if (ret > 0) { + std::array buf; int processed = 0; for (int i = 0; i < ret; ++i) { ++sends_complete; LF_DEB(cnt_deb<9>, debug(str<>("Completion"), i, dec<2>(i), "txcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + fi_tostr_r( + buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), + "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_SEND | FI_MSG)) != 0) { @@ -395,14 +397,16 @@ namespace oomph::libfabric { // if (ret > 0) { + std::array buf; int processed = 0; for (int i = 0; i < ret; ++i) { ++recvs_complete; LF_DEB(cnt_deb<2>, debug(str<>("Completion"), i, "rxcq flags", - fi_tostr(&entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), "(", - dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), + fi_tostr_r( + buf.data(), buf.size(), &entry[i].flags, FI_TYPE_CQ_EVENT_FLAGS), + "(", dec<>(entry[i].flags), ")", "context", hptr(entry[i].op_context), "length", hex<6>(entry[i].len))); if ((entry[i].flags & (FI_TAGGED | FI_RECV)) != 0) { diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 205c40ab..301e2e8d 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -744,6 +744,10 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- uint64_t caps_flags(uint64_t available_flags) const { + char buf[1024]; + LF_DEB(cnb_err, + debug(str<>("caps available"), hex(available_flags), + fi_tostr_r(buf, 1024, &available_flags, FI_TYPE_CAPS))); uint64_t required_flags = static_cast(this)->caps_flags(available_flags); // @@ -754,10 +758,14 @@ namespace NS_LIBFABRIC { if ((required_flags & f) && ((available_flags & f) == 0)) { LF_DEB(cnb_err, - error(str<>("caps flags unavailable"), fi_tostr(&f, FI_TYPE_CAPS))); + error(str<>("caps flags unavailable"), + fi_tostr_r(buf, 1024, &f, FI_TYPE_CAPS))); final_flags &= ~f; } } + LF_DEB(cnb_err, + debug(str<>("caps flags requested"), hex(final_flags), + fi_tostr_r(buf, 1024, &final_flags, FI_TYPE_CAPS))); return final_flags; } @@ -832,27 +840,30 @@ namespace NS_LIBFABRIC { if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fabric info"); if (display_fabric_info_ && fabric_info_) { - char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); - if (info_str) - { - LF_DEB(cnb_err, - trace(str<>("Fabric info"), "pre-check ->", - fabric_hints_->fabric_attr->prov_name, "\n", - fi_tostr(fabric_info_, FI_TYPE_INFO))); - } + std::array buf; + LF_DEB(cnb_err, + trace(str<>("Fabric info"), "pre-check ->", + fabric_hints_->fabric_attr->prov_name, "\n", + fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); } - fabric_hints_->caps = caps_flags(fabric_info_->caps); + // set capabilities we want to request + uint64_t all_caps = + caps_flags(fabric_info_->rx_attr->caps | fabric_info_->tx_attr->caps); + + // fabric_hints_->caps = all_caps; + fabric_hints_->tx_attr->caps = fabric_info_->tx_attr->caps & all_caps; + fabric_hints_->rx_attr->caps = fabric_info_->rx_attr->caps & all_caps; + if ((fabric_info_->mode & FI_CONTEXT) == 0) { + std::array buf; LF_DEB(cnb_err, debug(str<>("mode FI_CONTEXT!=0"), - fi_tostr(&fabric_hints_->domain_attr->mode, FI_TYPE_MODE))); + fi_tostr_r(buf.data(), buf.size(), &fabric_hints_->domain_attr->mode, + FI_TYPE_MODE))); } - fabric_hints_->mode = fabric_info_->mode; fabric_hints_->domain_attr->name = strdup(fabric_info_->domain_attr->name); - std::cout << fi_tostr(&fabric_hints_->domain_attr->mr_mode, FI_TYPE_MR_MODE) - << std::endl; // Enable/Disable the use of progress threads auto progress = libfabric_progress_type(); @@ -862,8 +873,7 @@ namespace NS_LIBFABRIC { if (threads > 1) { - LF_DEB(cnb_deb, debug(str<>("FI_THREAD_FID"))); - // Enable thread safe mode (Does not work with psm2 provider) + LF_DEB(cnb_deb, debug(str<>("Setting Threads>1 level"))); // fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; // fabric_hints_->domain_attr->threading = FI_THREAD_FID; fabric_hints_->domain_attr->threading = threadlevel_flags(); @@ -891,29 +901,35 @@ namespace NS_LIBFABRIC { if (rootnode) { + std::array buf; LF_DEB(cnb_err, - trace(str<>("Fabric info"), "\n", fi_tostr(fabric_info_, FI_TYPE_INFO))); + trace(str<>("Fabric info"), "\n", + fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO))); } - int mrkey = (fabric_hints_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; + int mrkey = (fabric_info_->domain_attr->mr_mode & FI_MR_PROV_KEY) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_PROV_KEY"), mrkey)); - bool context = (fabric_hints_->mode & FI_CONTEXT) != 0; + bool context = (fabric_info_->mode & FI_CONTEXT) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_CONTEXT"), context)); - mrlocal = (fabric_hints_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; + mrlocal = (fabric_info_->domain_attr->mr_mode & FI_MR_LOCAL) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_LOCAL"), mrlocal)); - mrbind = (fabric_hints_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; + mrbind = (fabric_info_->domain_attr->mr_mode & FI_MR_ENDPOINT) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ENDPOINT"), mrbind)); /* Check if provider requires heterogeneous memory registration */ - mrhmem = (fabric_hints_->domain_attr->mr_mode & FI_MR_HMEM) != 0; + mrhmem = (fabric_info_->domain_attr->mr_mode & FI_MR_HMEM) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_HMEM"), mrhmem)); - bool mrhalloc = (fabric_hints_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; + bool mrhalloc = (fabric_info_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); + int auth_key = (fabric_info_->domain_attr->max_ep_auth_key); + LF_DEB(cnb_deb, debug(str<>("Supported max_ep_auth_key"), auth_key)); + fabric_info_->domain_attr->max_ep_auth_key = 0; + LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); @@ -985,8 +1001,10 @@ namespace NS_LIBFABRIC { // Print fabric info to a human-readable string if available if (display_fabric_info_ && fabric_info_) { - char const* info_str = fi_tostr(fabric_info_, FI_TYPE_INFO); - if (info_str) { std::cout << "Libfabric fabric info:\n" << info_str << std::endl; } + std::array buf; + std::cout << "Libfabric fabric info:\n" + << fi_tostr_r(buf.data(), buf.size(), fabric_info_, FI_TYPE_INFO) + << std::endl; } fi_freeinfo(fabric_hints_); } From 0da9dcc9f66b5bd7e00b8e7282d2316b69af6645 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 13:38:40 +0000 Subject: [PATCH 63/68] Fixes to support new hwmalloc API --- src/libfabric/context.cpp | 10 +++++----- src/libfabric/context.hpp | 6 ++++-- src/libfabric/test/check_libfabric.cpp | 6 ++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index 6b49098a..a5a51e19 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -22,10 +22,10 @@ namespace oomph { using controller_type = libfabric::controller; - context_impl::context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve, bool debug) + context_impl::context_impl( + MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, bool debug) : context_base(comm, thread_safe) - , m_heap{this, message_pool_never_free, message_pool_reserve} + , m_heap{this, heap_config} , m_recv_cb_queue(128) , m_recv_cb_cancel(8) { @@ -35,8 +35,8 @@ namespace oomph { m_ctxt_tag = reinterpret_cast(this); OOMPH_CHECK_MPI_RESULT(MPI_Bcast(&m_ctxt_tag, 1, MPI_UINT64_T, 0, comm)); - LF_DEB(src_deb, - debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); + LF_DEB( + src_deb, debug(str<>("Broadcast"), "rank", dec<3>(rank), "context", hptr(m_ctxt_tag))); // TODO fix the thread safety // problem: controller is a singleton and has problems when 2 contexts are created diff --git a/src/libfabric/context.hpp b/src/libfabric/context.hpp index e7f0308f..76654d66 100644 --- a/src/libfabric/context.hpp +++ b/src/libfabric/context.hpp @@ -57,8 +57,10 @@ namespace oomph { callback_queue m_recv_cb_cancel; public: - context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, - std::size_t message_pool_reserve, bool debug = false); + context_impl(MPI_Comm comm, bool thread_safe, hwmalloc::heap_config const& heap_config, + bool debug = false); + // context_impl(MPI_Comm comm, bool thread_safe, bool message_pool_never_free, + // std::size_t message_pool_reserve, bool debug = false); context_impl(context_impl const&) = delete; context_impl(context_impl&&) = delete; diff --git a/src/libfabric/test/check_libfabric.cpp b/src/libfabric/test/check_libfabric.cpp index 070c8f11..11d9788e 100644 --- a/src/libfabric/test/check_libfabric.cpp +++ b/src/libfabric/test/check_libfabric.cpp @@ -15,6 +15,8 @@ #include "../communicator.hpp" #include "../context.hpp" +#include + int main(int argc, char** argv) { using namespace oomph; @@ -24,6 +26,6 @@ int main(int argc, char** argv) bool debug = true; // mpi_environment env(multi_threaded, argc, argv); - auto ctxt = - context_impl(MPI_COMM_WORLD, true, message_pool_never_free, message_pool_reserve, debug); + hwmalloc::heap_config const& default_heap = hwmalloc::get_default_heap_config(); + auto ctxt = context_impl(MPI_COMM_WORLD, true, default_heap /*, debug*/); } From 18ac8b8b92b99c93b7efae3bbc2965c17c029ffd Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 13:39:57 +0000 Subject: [PATCH 64/68] ifdefs for LNX provider, especially address unsupported address-string functions --- src/libfabric/controller.hpp | 15 ++++++++++----- src/libfabric/controller_base.hpp | 7 +++++-- src/libfabric/locality.hpp | 12 ++++++++---- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/libfabric/controller.hpp b/src/libfabric/controller.hpp index 1b8e3b55..f015a0c4 100644 --- a/src/libfabric/controller.hpp +++ b/src/libfabric/controller.hpp @@ -67,7 +67,7 @@ namespace oomph::libfabric { // -------------------------------------------------------------------- constexpr fi_threading threadlevel_flags() { -#if defined(HAVE_LIBFABRIC_GNI) /*|| defined(HAVE_LIBFABRIC_CXI)*/ +#if defined(HAVE_LIBFABRIC_GNI) || defined(HAVE_LIBFABRIC_LNX) return FI_THREAD_ENDPOINT; #else return FI_THREAD_SAFE; @@ -77,10 +77,13 @@ namespace oomph::libfabric { // -------------------------------------------------------------------- uint64_t caps_flags(uint64_t /*available_flags*/) const { - uint64_t flags_required = FI_MSG | FI_TAGGED | FI_RMA | FI_READ | FI_WRITE | FI_RECV | - FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE; -#if OOMPH_ENABLE_DEVICE + uint64_t flags_required = FI_TAGGED; +#ifndef HAVE_LIBFABRIC_LNX + flags_required |= FI_MSG | FI_TAGGED | FI_RECV | FI_SEND | FI_RMA | FI_READ | FI_WRITE | + FI_REMOTE_READ | FI_REMOTE_WRITE; +# if OOMPH_ENABLE_DEVICE flags_required |= FI_HMEM; +# endif #endif return flags_required; } @@ -170,7 +173,9 @@ namespace oomph::libfabric { LF_DEB(cnt_deb<9>, debug(str<>("initialize_localities"), size, "localities")); MPI_exchange_localities(av, mpi_comm, rank, size); +#ifndef HAVE_LIBFABRIC_LNX // address stuff not yet supported debug_print_av_vector(size); +#endif LF_DEB(cnt_deb<9>, debug(str<>("Done localities"))); } @@ -179,7 +184,7 @@ namespace oomph::libfabric { { #if defined(HAVE_LIBFABRIC_GNI) return true; -#elif defined(HAVE_LIBFABRIC_CXI) +#elif defined(HAVE_LIBFABRIC_LNX) // @todo : cxi provider is not yet thread safe using scalable endpoints return false; #else diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 301e2e8d..e710757c 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -778,6 +778,9 @@ namespace NS_LIBFABRIC { // -------------------------------------------------------------------- constexpr std::int64_t memory_registration_mode_flags() { +#if defined(HAVE_LIBFABRIC_LNX) + return FI_MR_HMEM; +#endif std::int64_t base_flags = FI_MR_ALLOCATED; // | FI_MR_VIRT_ADDR | FI_MR_PROV_KEY; #if OOMPH_ENABLE_DEVICE base_flags = base_flags | FI_MR_HMEM; @@ -1390,8 +1393,8 @@ namespace NS_LIBFABRIC { { #if defined(HAVE_LIBFABRIC_GNI) return true; -#elif defined(HAVE_LIBFABRIC_CXI) - // @todo : cxi provider is not yet thread safe using scalable endpoints +#elif defined(HAVE_LIBFABRIC_LNX) + // @todo : provider is not yet thread safe using scalable endpoints return false; #else return (threadlevel_flags() == FI_THREAD_SAFE || diff --git a/src/libfabric/locality.hpp b/src/libfabric/locality.hpp index 67c753e7..9e91cec1 100644 --- a/src/libfabric/locality.hpp +++ b/src/libfabric/locality.hpp @@ -50,7 +50,7 @@ #endif #if defined(HAVE_LIBFABRIC_LNX) -# define HAVE_LIBFABRIC_LOCALITY_SIZE 32 +# define HAVE_LIBFABRIC_LOCALITY_SIZE 512 #endif namespace oomph { @@ -172,12 +172,16 @@ namespace oomph { namespace libfabric { std::string to_str() const { - char sbuf[256]; - size_t buflen = 256; + size_t buflen = 1024; + std::array buf; if (!av_) { return "No address vector"; } - char const* straddr_ret = fi_av_straddr(av_, data_.data(), sbuf, &buflen); + char const* straddr_ret = fi_av_straddr(av_, data_.data(), buf.data(), &buflen); +#ifdef HAVE_LIBFABRIC_LNX + return "LNX does not yet support straddr"; +#else std::string result = straddr_ret ? straddr_ret : "Address formatting Error"; return result; +#endif } private: From c07bd67596ab27f5b57ea0978d439c37069cec6c Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 14:33:10 +0000 Subject: [PATCH 65/68] Fix an API change introduce from libfabric 1.20 --- src/libfabric/controller_base.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index e710757c..06f19562 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -928,11 +928,11 @@ namespace NS_LIBFABRIC { bool mrhalloc = (fabric_info_->domain_attr->mr_mode & FI_MR_ALLOCATED) != 0; LF_DEB(cnb_deb, debug(str<>("Requires FI_MR_ALLOCATED"), mrhalloc)); - +#if (FI_MAJOR_VERSION > 1) || ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION >= 20) int auth_key = (fabric_info_->domain_attr->max_ep_auth_key); LF_DEB(cnb_deb, debug(str<>("Supported max_ep_auth_key"), auth_key)); fabric_info_->domain_attr->max_ep_auth_key = 0; - +#endif LF_DEB(cnb_deb, debug(str<>("Creating fi_fabric"))); ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); if (ret) throw NS_LIBFABRIC::fabric_error(ret, "Failed to get fi_fabric"); From 3e4408d4945c18166ccc845b954cd07db35811e2 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 14:37:47 +0000 Subject: [PATCH 66/68] Replace strcpy with strncpy --- src/libfabric/context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libfabric/context.cpp b/src/libfabric/context.cpp index a5a51e19..a1debfd7 100644 --- a/src/libfabric/context.cpp +++ b/src/libfabric/context.cpp @@ -76,7 +76,7 @@ namespace oomph { static char buffer[32]; std::string temp = std::to_string(m_controller->rendezvous_threshold()); if (temp.size() > 31) throw std::runtime_error("Bad string option check, fix please"); - strcpy(buffer, temp.c_str()); + strncpy(buffer, temp.c_str(), 32); return buffer; } else { return "unspecified"; } From 2d79d7bddd34f0e3824297d64b00fcc914b1e1cd Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 14:49:00 +0000 Subject: [PATCH 67/68] fix: Split send/recv test into independent cpu and device mode tests --- test/test_send_recv.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_send_recv.cpp b/test/test_send_recv.cpp index f991df32..1326eecb 100644 --- a/test/test_send_recv.cpp +++ b/test/test_send_recv.cpp @@ -7,7 +7,12 @@ * Please, refer to the LICENSE file in the root directory. * SPDX-License-Identifier: BSD-3-Clause */ -#include +#ifdef TEST_DEVICE_MODE_ONLY +# ifdef HWMALLOC_ENABLE_DEVICE +# include +# endif +#endif + #include #include // use this path because device version in build dir needs to find include From 0226e13fa9757bdb0488af6b3b346c7071257bb5 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 13 Nov 2025 15:29:12 +0000 Subject: [PATCH 68/68] Fix CI build fails due to unsupported older libfabric version --- src/libfabric/controller_base.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libfabric/controller_base.hpp b/src/libfabric/controller_base.hpp index 06f19562..d423803b 100644 --- a/src/libfabric/controller_base.hpp +++ b/src/libfabric/controller_base.hpp @@ -39,6 +39,10 @@ #include "memory_region.hpp" #include "operation_context_base.hpp" +#if ((FI_MAJOR_VERSION == 1) && FI_MINOR_VERSION <= 12) +#define fi_tostr_r(a,b,c,d) " " +#endif + // #define DISABLE_FI_INJECT // #define EXCESSIVE_POLLING_BACKOFF_MICRO_S 50