From f70e65c096433273840f836e067450d1d1a760af Mon Sep 17 00:00:00 2001 From: Aditya Tomar Date: Sun, 31 Mar 2024 23:56:46 -0400 Subject: [PATCH 1/6] add flags for ROCm and RCCL --- Makefile | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index df453b4..9f6d40a 100644 --- a/Makefile +++ b/Makefile @@ -4,15 +4,21 @@ # SPDX-License-Identifier: MIT CC = cc -INC = -I/global/common/software/nersc9/nccl/2.19.4/include -CFLAGS = -std=c++11 -O2 -target-accel=nvidia80 --cuda-gpu-arch=sm_80 -DUSE_CUDA -DUSE_NCCL -LDFLAGS = -L/global/common/software/nersc9/nccl/2.19.4/lib -lnccl +# perlmutter flags +# INC = -I/global/common/software/nersc9/nccl/2.19.4/include +# CFLAGS = -std=c++11 -O2 -target-accel=nvidia80 --cuda-gpu-arch=sm_80 -DUSE_CUDA -DUSE_NCCL +# LDFLAGS = -L/global/common/software/nersc9/nccl/2.19.4/lib -lnccl + +# frontier flags +INC = -L${ROCM_PATH}/lib -lamdhip64 +CFLAGS = -std=c++11 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip -DUSE_ROCM -DUSE_RCCL +LDFLAGS = --rocm-path=${ROCM_PATH} -lrccl all: allgather.x -allgather.x: allgather.cu +allgather.x: allgather.cu ${CC} ${CFLAGS} ${INC} ${LDFLAGS} -o allgather.x allgather.cu clean: - rm -f allgather.x + rm -f allgather.x From e077503742f70d62e1c26043eb755f25f9b61358 Mon Sep 17 00:00:00 2001 From: Aditya Tomar <59426357+RoastSea8@users.noreply.github.com> Date: Sun, 31 Mar 2024 21:48:27 -0700 Subject: [PATCH 2/6] Update and rename README to README.md --- README => README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) rename README => README.md (52%) diff --git a/README b/README.md similarity index 52% rename from README rename to README.md index eba2046..396231b 100644 --- a/README +++ b/README.md @@ -1,9 +1,13 @@ Before compiling do these: +### Perlmutter +```sh module load PrgEnv-cray cudatoolkit craype-accel-nvidia80 export CRAY_ACCEL_TARGET=nvidia80 - -When running do these: - -module load cudatoolkit export MPICH_GPU_SUPPORT_ENABLED=1 +``` +### Frontier +```sh +module load PrgEnv-cray amd-mixed craype-accel-amd-gfx90a +export MPICH_GPU_SUPPORT_ENABLED=1 +``` From fdb324ad8fb90fb6c9ec78c7551c8324e2655fa7 Mon Sep 17 00:00:00 2001 From: Aditya Tomar Date: Tue, 9 Apr 2024 06:05:06 -0700 Subject: [PATCH 3/6] update Makefile --- Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 9f6d40a..52c0235 100644 --- a/Makefile +++ b/Makefile @@ -3,17 +3,17 @@ # # SPDX-License-Identifier: MIT -CC = cc +CC = cc # perlmutter flags -# INC = -I/global/common/software/nersc9/nccl/2.19.4/include -# CFLAGS = -std=c++11 -O2 -target-accel=nvidia80 --cuda-gpu-arch=sm_80 -DUSE_CUDA -DUSE_NCCL -# LDFLAGS = -L/global/common/software/nersc9/nccl/2.19.4/lib -lnccl +# INC = -I/global/common/software/nersc9/nccl/2.19.4/include +# CFLAGS = -std=c++11 -O2 -target-accel=nvidia80 --cuda-gpu-arch=sm_80 -DUSE_CUDA -DUSE_NCCL +# LDFLAGS = -L/global/common/software/nersc9/nccl/2.19.4/lib -lnccl # frontier flags -INC = -L${ROCM_PATH}/lib -lamdhip64 -CFLAGS = -std=c++11 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip -DUSE_ROCM -DUSE_RCCL -LDFLAGS = --rocm-path=${ROCM_PATH} -lrccl +INC = -I${ROCM_PATH}/include +CFLAGS = -std=c++11 -O2 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip -DUSE_ROCM -DUSE_RCCL +LDFLAGS = -L${ROCM_PATH}/lib -lamdhip64 -lrccl all: allgather.x From 9d2ba5ba66bed818f2647f3e5ff37a2747b3f168 Mon Sep 17 00:00:00 2001 From: Aditya Tomar Date: Sun, 14 Apr 2024 04:47:02 -0700 Subject: [PATCH 4/6] update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 396231b..f94be86 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,13 @@ Before compiling do these: ### Perlmutter ```sh -module load PrgEnv-cray cudatoolkit craype-accel-nvidia80 +module load PrgEnv-cray cudatoolkit craype-accel-nvidia80 nccl/2.19.4 export CRAY_ACCEL_TARGET=nvidia80 export MPICH_GPU_SUPPORT_ENABLED=1 ``` ### Frontier ```sh -module load PrgEnv-cray amd-mixed craype-accel-amd-gfx90a +module load PrgEnv-cray amd-mixed/5.6.0 craype-accel-amd-gfx90a cray-mpich/8.1.26 cpe/23.05 export MPICH_GPU_SUPPORT_ENABLED=1 +export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}" ``` From 23193a903b5f4e683da7af5ea4bd9acae7db8792 Mon Sep 17 00:00:00 2001 From: Aditya Tomar Date: Fri, 26 Apr 2024 11:21:36 -0700 Subject: [PATCH 5/6] update --- Makefile | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 52c0235..4a71154 100644 --- a/Makefile +++ b/Makefile @@ -11,14 +11,20 @@ CC = cc # LDFLAGS = -L/global/common/software/nersc9/nccl/2.19.4/lib -lnccl # frontier flags -INC = -I${ROCM_PATH}/include -CFLAGS = -std=c++11 -O2 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip -DUSE_ROCM -DUSE_RCCL -LDFLAGS = -L${ROCM_PATH}/lib -lamdhip64 -lrccl +# INC = -I${ROCM_PATH}/include +# CFLAGS = -std=c++11 -O2 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip -DUSE_ROCM -DUSE_RCCL +# LDFLAGS = -L${ROCM_PATH}/lib -lamdhip64 -lrccl -all: allgather.x +all: allgather.x allreduce.x reduce_scatter.x allgather.x: allgather.cu ${CC} ${CFLAGS} ${INC} ${LDFLAGS} -o allgather.x allgather.cu +allreduce.x: allreduce.cu + ${CC} ${CFLAGS} ${INC} ${LDFLAGS} -o allreduce.x allreduce.cu + +reduce_scatter.x: reduce_scatter.cu + ${CC} ${CFLAGS} ${INC} ${LDFLAGS} -o reduce_scatter.x reduce_scatter.cu + clean: - rm -f allgather.x + rm -f allgather.x allreduce.x reduce_scatter.x From ba9441879e5c623481ca193a85593c726f73f29d Mon Sep 17 00:00:00 2001 From: Aditya Tomar Date: Thu, 11 Jul 2024 18:13:34 -0700 Subject: [PATCH 6/6] use latest NCCL version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f94be86..3688d81 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Before compiling do these: ### Perlmutter ```sh -module load PrgEnv-cray cudatoolkit craype-accel-nvidia80 nccl/2.19.4 +module load PrgEnv-cray cudatoolkit craype-accel-nvidia80 nccl export CRAY_ACCEL_TARGET=nvidia80 export MPICH_GPU_SUPPORT_ENABLED=1 ```