Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
040514f
Add cicd-ext CI configuration
msimberg Mar 25, 2026
ba1784c
Apply suggestions from code review
msimberg Mar 25, 2026
8cf5076
Apply suggestion from @msimberg
msimberg Mar 25, 2026
5423771
Fix CI container build args
msimberg Mar 25, 2026
3f15f1f
Specify oomph@main in spack environments
msimberg Mar 25, 2026
b832fe6
Remove +python from spack specs
msimberg Mar 25, 2026
1e851df
Remove stages
msimberg Mar 25, 2026
0398d59
Refactor ci config
msimberg Mar 25, 2026
b1297ac
Fix base image
msimberg Mar 26, 2026
3484722
Fix typo
msimberg Mar 26, 2026
33624ef
Fix env file path
msimberg Mar 26, 2026
a3e950d
Update cmake config in CI
msimberg Mar 26, 2026
9871e88
Use NUM_PROCS instead of nproc
msimberg Mar 26, 2026
177592d
Fix num procs
msimberg Mar 26, 2026
110d4eb
Update test job config
msimberg Mar 26, 2026
9518354
Fix syntax
msimberg Mar 26, 2026
5419c5b
Fix parallel testing
msimberg Mar 26, 2026
ffd02a4
Explicitly ask for one gpu per task
msimberg Mar 26, 2026
a81f37f
Verbose ctest output
msimberg Mar 26, 2026
7b35569
Explicitly set debug build for CI
msimberg Mar 26, 2026
60e0e25
Don't set any mpiexec options if MPIEXEC_EXECUTABLE is empty
msimberg Mar 26, 2026
c3ea568
Don't buffer test output
msimberg Mar 26, 2026
521011e
Skip cancel test
msimberg Mar 26, 2026
abb4188
Fix slurm variables
msimberg Mar 26, 2026
3689ebe
Shorten timeouts
msimberg Mar 26, 2026
841d97b
Don't load cxi hooks in CI
msimberg Mar 26, 2026
d7995af
Update slurm and ctest options
msimberg Mar 26, 2026
c91ae1c
List libfabric and ucx info in CI
msimberg Apr 9, 2026
b74e96d
Clean up test templates
msimberg Apr 9, 2026
80ce06c
Disable NCCL CI pipelines since it's not yet supported
msimberg Apr 9, 2026
3fed378
Small cleanup and parallel non-distributed tests in CI
msimberg Apr 9, 2026
38112f2
strace ctest call
msimberg Apr 10, 2026
f1e54aa
Verbose CI tests
msimberg Apr 10, 2026
faf58b8
Remove verbose parallel tests in CI
msimberg Apr 10, 2026
e2e7d9c
Fix fortran parallel tests when MPIEXEC_EXECUTABLE is empty
msimberg Apr 10, 2026
d83dbaa
Add missing BACKEND build arg to build step in CI
msimberg Apr 10, 2026
a21ac78
Singular hour
msimberg Apr 10, 2026
d2ae1f0
Double word
msimberg Apr 10, 2026
bdd9374
Remove unnecessary extends
msimberg Apr 10, 2026
d89bf20
More curl flags
msimberg Apr 10, 2026
45d6992
Use prerelease base image in CI
msimberg Apr 13, 2026
841b9bf
Use separate Testing directory for ctest per process
msimberg Apr 13, 2026
c80510f
Try to fix ctest wrapper
msimberg Apr 13, 2026
5fe5a13
cd into build directory
msimberg Apr 13, 2026
e55f1ad
Fix testing path
msimberg Apr 13, 2026
2ce53ed
Try without --map-root-user
msimberg Apr 13, 2026
9c71102
Try something else for ctest deadlocks
msimberg Apr 13, 2026
5f3f6bb
Fix syntax error
msimberg Apr 13, 2026
e33fd3d
Add sleep just to be safe when symlinking testing directory
msimberg Apr 14, 2026
a00c372
Use public path for base images
msimberg Apr 14, 2026
a4e0936
Clang-format files affected by libfabric changes
biddisco Nov 13, 2025
cf4b3bf
disable clang-format for cmake generated code, fix missing include
biddisco Jul 7, 2025
de6158e
Add shm provider support to libfabric transport layer
biddisco Jul 7, 2025
31a22c6
Remove unused includes and fix warnings in libfabric backend
biddisco Jul 7, 2025
b286e79
Split send/recv test into independent cpu and device mode tests
biddisco Jul 7, 2025
06b9e0a
Remove ipaddress locality functions and instead use AV fi_to_str
biddisco Jul 7, 2025
9dbcffe
Add LNX provider, simplify provider #ifdefs and fabric hints/info setup
biddisco Jul 7, 2025
e1b043d
Use thread mask (instead of boost::physical_concurrency) for num thre…
biddisco Jul 9, 2025
52b0cf6
Fix cxi initialization, some hints must be set before fi_info becomes…
biddisco Jul 9, 2025
5a9c3ad
Disable debug messages
biddisco Jul 9, 2025
21fdbbd
Clean up debug: namespace usage and rename ptr to (hex) hptr
biddisco Jul 9, 2025
b950074
Use safe fi_tostr_r and a std::array buffer in place of fi_tostr
biddisco Jul 10, 2025
0da9dcc
Fixes to support new hwmalloc API
biddisco Nov 13, 2025
18ac8b8
ifdefs for LNX provider, especially address unsupported address-strin…
biddisco Nov 13, 2025
c07bd67
Fix an API change introduce from libfabric 1.20
biddisco Nov 13, 2025
3e4408d
Replace strcpy with strncpy
biddisco Nov 13, 2025
2d79d7b
fix: Split send/recv test into independent cpu and device mode tests
biddisco Nov 13, 2025
0226e13
Fix CI build fails due to unsupported older libfabric version
biddisco Nov 13, 2025
fda0655
Merge remote-tracking branch 'msimberg/cicd-ext' into lnx-ci
biddisco Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .cscs-ci/container/build.Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ARG DEPS_IMAGE
FROM $DEPS_IMAGE

COPY . /oomph
WORKDIR /oomph

ARG BACKEND
ARG NUM_PROCS
RUN spack -e ci build-env oomph -- \
cmake -G Ninja -B build \
-DCMAKE_BUILD_TYPE=Debug \
-DOOMPH_WITH_TESTING=ON \
-DOOMPH_WITH_$(echo $BACKEND | tr '[:lower:]' '[:upper:]')=ON \
-DOOMPH_USE_BUNDLED_LIBS=ON \
-DOOMPH_USE_BUNDLED_HWMALLOC=OFF \
-DMPIEXEC_EXECUTABLE="" \
-DMPIEXEC_NUMPROC_FLAG="" \
-DMPIEXEC_PREFLAGS="" \
-DMPIEXEC_POSTFLAGS="" && \
spack -e ci build-env oomph -- cmake --build build -j$NUM_PROCS
24 changes: 24 additions & 0 deletions .cscs-ci/container/deps.Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

ARG SPACK_SHA
RUN mkdir -p /opt/spack && \
curl -fLsS "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack

ENV PATH="/opt/spack/bin:$PATH"

ARG SPACK_PACKAGES_SHA
RUN mkdir -p /opt/spack-packages && \
curl -fLsS "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages

RUN spack repo remove --scope defaults:base builtin && \
spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin

ARG SPACK_ENV_FILE
COPY $SPACK_ENV_FILE /spack_environment/spack.yaml

ARG NUM_PROCS
RUN spack external find --all && \
spack env create ci /spack_environment/spack.yaml && \
spack -e ci concretize -f && \
spack -e ci install --jobs $NUM_PROCS --fail-fast --only=dependencies
192 changes: 192 additions & 0 deletions .cscs-ci/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
include:
- remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'

variables:
BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps4-dev
SPACK_SHA: v1.1.1
SPACK_PACKAGES_SHA: bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25
FF_TIMESTAMPS: true

.build_deps_template:
timeout: 1 hour
before_script:
- echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true
- export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16`
- export ENV_FILE_SHA=`sha256sum ${SPACK_ENV_FILE} | head -c 16`
- export CONFIG_TAG=`echo $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | sha256sum - | head -c 16`
- export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-spack-deps-$BACKEND:$CONFIG_TAG
- echo -e "CONFIG_TAG=$CONFIG_TAG" >> base-${BACKEND}.env
- echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env
variables:
DOCKERFILE: .cscs-ci/container/deps.Containerfile
DOCKER_BUILD_ARGS: '["BASE_IMAGE", "SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]'
SPACK_ENV_FILE: .cscs-ci/spack/$BACKEND.yaml
artifacts:
reports:
dotenv: base-${BACKEND}.env

# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55
# build_deps_nccl:
# variables:
# BACKEND: nccl
# extends:
# - .container-builder-cscs-gh200
# - .build_deps_template

build_deps_mpi:
variables:
BACKEND: mpi
extends:
- .container-builder-cscs-gh200
- .build_deps_template

build_deps_ucx:
variables:
BACKEND: ucx
extends:
- .container-builder-cscs-gh200
- .build_deps_template

build_deps_libfabric:
variables:
BACKEND: libfabric
extends:
- .container-builder-cscs-gh200
- .build_deps_template

.build_template:
extends: .container-builder-cscs-gh200
timeout: 15 minutes
before_script:
- echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true
- export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/oomph-build-$BACKEND:$CI_COMMIT_SHA
- echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env
variables:
DOCKERFILE: .cscs-ci/container/build.Containerfile
DOCKER_BUILD_ARGS: '["DEPS_IMAGE", "BACKEND"]'
artifacts:
reports:
dotenv: build-${BACKEND}.env

# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55
# build_nccl:
# variables:
# BACKEND: nccl
# extends: .build_template
# needs:
# - job: build_deps_nccl
# artifacts: true

build_mpi:
variables:
BACKEND: mpi
extends: .build_template
needs:
- job: build_deps_mpi
artifacts: true

build_ucx:
variables:
BACKEND: ucx
extends: .build_template
needs:
- job: build_deps_ucx
artifacts: true

build_libfabric:
variables:
BACKEND: libfabric
extends: .build_template
needs:
- job: build_deps_libfabric
artifacts: true

.test_template_base:
extends: .container-runner-clariden-gh200
variables:
SLURM_JOB_NUM_NODES: 1
SLURM_GPUS_PER_TASK: 1
SLURM_TIMELIMIT: '5:00'
SLURM_PARTITION: normal
SLURM_MPI_TYPE: pmix
SLURM_NETWORK: disable_rdzv_get
SLURM_LABELIO: 1
SLURM_UNBUFFEREDIO: 1
PMIX_MCA_psec: native
PMIX_MCA_gds: "^shmem2"
USE_MPI: NO

.test_serial_template:
extends: .test_template_base
variables:
SLURM_NTASKS: 1
script:
- ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8

.test_parallel_template:
extends: .test_template_base
variables:
SLURM_NTASKS: 4
script:
# All ranks write to ctest files in Testing, but this can deadlock when
# writing inside the container.
- if [[ "${SLURM_PROCID}" == 0 ]]; then rm -rf /oomph/build/Testing; mkdir /tmp/Testing; ln -s /tmp/Testing /oomph/build/Testing; fi
- sleep 1
- ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60

# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55
# test_serial_nccl:
# extends: .test_serial_template
# needs:
# - job: build_nccl
# artifacts: true
# image: $BUILD_IMAGE

# test_parallel_nccl:
# extends: .test_parallel_template
# needs:
# - job: build_nccl
# artifacts: true
# image: $BUILD_IMAGE

test_serial_mpi:
extends: .test_serial_template
needs:
- job: build_mpi
artifacts: true
image: $BUILD_IMAGE

test_parallel_mpi:
extends: .test_parallel_template
needs:
- job: build_mpi
artifacts: true
image: $BUILD_IMAGE

test_serial_ucx:
extends: .test_serial_template
needs:
- job: build_ucx
artifacts: true
image: $BUILD_IMAGE

test_parallel_ucx:
extends: .test_parallel_template
needs:
- job: build_ucx
artifacts: true
image: $BUILD_IMAGE

test_serial_libfabric:
extends: .test_serial_template
needs:
- job: build_libfabric
artifacts: true
image: $BUILD_IMAGE

test_parallel_libfabric:
extends: .test_parallel_template
needs:
- job: build_libfabric
artifacts: true
image: $BUILD_IMAGE
6 changes: 6 additions & 0 deletions .cscs-ci/spack/libfabric.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
spack:
specs:
- oomph@main backend=libfabric +cuda
view: false
concretizer:
unify: true
6 changes: 6 additions & 0 deletions .cscs-ci/spack/mpi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
spack:
specs:
- oomph@main backend=mpi +cuda
view: false
concretizer:
unify: true
6 changes: 6 additions & 0 deletions .cscs-ci/spack/nccl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
spack:
specs:
- oomph@main backend=nccl +cuda
view: false
concretizer:
unify: true
6 changes: 6 additions & 0 deletions .cscs-ci/spack/ucx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
spack:
specs:
- oomph@main backend=ucx +cuda
view: false
concretizer:
unify: true
9 changes: 2 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
cmake_minimum_required(VERSION 3.17)
# CMake version is set at 3.17 because of find_package(CUDAToolkit)

if (NOT ${CMAKE_VERSION} VERSION_LESS 3.27)
# new in 3.27: additionally use uppercase <PACKAGENAME>_ROOT
# environment and CMake variables for find_package
cmake_policy(SET CMP0144 NEW)
endif()

set(OOMPH_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
list(APPEND CMAKE_MODULE_PATH "${OOMPH_MODULE_PATH}")

Expand All @@ -28,6 +22,7 @@ endfunction()

set_policy(CMP0074 NEW) # find_package uses XXX_ROOT vars using PackageName
set_policy(CMP0144 NEW) # find_package allows XXX_ROOT vars using PACKAGENAME Uppercase
set_policy(CMP0167 NEW) # find_package uses new boost config (boost 1.70 onwards)

# ---------------------------------------------------------------------
# CMake setup, C++ version, build type, modules, etc
Expand Down Expand Up @@ -92,7 +87,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.hpp.in
${CMAKE_CURRENT_BINARY_DIR}/include/oomph/config.hpp @ONLY)
install(FILES ${PROJECT_BINARY_DIR}/include/oomph/config.hpp
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/oomph)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_config.inc.in
${CMAKE_CURRENT_BINARY_DIR}/include/oomph/cmake_config.inc)

# ---------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions cmake/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@

#cmakedefine01 OOMPH_USE_FAST_PIMPL
#cmakedefine01 OOMPH_ENABLE_BARRIER

// clang-format off
#define OOMPH_RECURSION_DEPTH @OOMPH_RECURSION_DEPTH@

#define OOMPH_VERSION @OOMPH_VERSION_NUMERIC@
#define OOMPH_VERSION_MAJOR @OOMPH_VERSION_MAJOR@
#define OOMPH_VERSION_MINOR @OOMPH_VERSION_MINOR@
#define OOMPH_VERSION_PATCH @OOMPH_VERSION_PATCH@
// clang-format on
2 changes: 2 additions & 0 deletions cmake/oomph_defs.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ namespace oomph
{
namespace fort
{
// clang-format off
using fp_type = @OOMPH_FORTRAN_FP@;
// clang-format on
typedef enum {
OomphBarrierGlobal=1,
OomphBarrierThread=2,
Expand Down
Loading
Loading