diff --git a/AMG2023/README.md b/AMG2023/README.md index 476ad56..14c75c8 100644 --- a/AMG2023/README.md +++ b/AMG2023/README.md @@ -1,9 +1,9 @@ # AMG2023 README For more detailed installation parameters, please refer to the [installation document](https://github.com/pssg-int/AMG2023/blob/main/amg-doc.pdf). -## Perlmutter Compilation +Repository: [AMG2023](https://github.com/hpcgroup/AMG2023/) -Repository: [AMG2023](https://github.com/pssg-int/AMG2023) +## Perlmutter Compilation ### Steps to Compile @@ -50,5 +50,61 @@ Repository: [AMG2023](https://github.com/pssg-int/AMG2023) cmake -DHYPRE_PREFIX=/pscratch/sd/c/cunyang/AMG2023 .. ``` -## Frontier Installation +## Frontier Compilation + +### Steps to Compile + +1. Load modules + ```sh + module reset + + module load cray-mpich/8.1.30 + module load craype-accel-amd-gfx90a + module load rocm/6.1.3 + export MPICH_GPU_SUPPORT_ENABLED=1 + + # load compatible cmake version + module load Core/24.07 + module load cmake/3.27.9 + ``` +2. Configure hypre (v2.32.0) + - Clone hypre v2.32.0 and navigate to src: + ```sh + git clone -b v2.32.0 https://github.com/hypre-space/hypre.git + cd into ~/hypre/src + ``` + - Configure hypre (in hypre/src) + ```sh + ./configure --with-hip --enable-device-memory-pool --enable-mixedint --with-gpu-arch=gfx90a \ + --with-MPI-lib-dirs="${MPICH_DIR}/lib" --with-MPI-libs="mpi" \ + --with-MPI-include="${MPICH_DIR}/include" \ + CFLAGS="-I${ROCM_PATH}/include/ -I${ROCM_PATH}/llvm/include/ -I${ROCM_PATH}/include/rocsparse/" \ + LDFLAGS="-L${ROCM_PATH}/lib/ -L${ROCM_PATH}/llvm/lib/ -lrocsparse" + ``` + - Compile hypre (in hypre/src) + ```sh + # build with make + make + ``` +3. Configure AMG2023 + - Clone repo: + ```sh + git clone https://github.com/pssg-int/AMG2023` + cd AMG2023 + ``` + - Add mpiP to LD_LIBRARY_PATH + ```sh + export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH + ``` + - Configure cmake + ```sh + mkdir build && cd build + cmake .. -DHYPRE_PREFIX=/ccs/home/keshprad/hypre/src/hypre/ \ + -DCMAKE_C_FLAGS="-I${ROCM_PATH}/include/ -I${ROCM_PATH}/llvm/include/ -I${ROCM_PATH}/include/rocsparse/" \ + -DCMAKE_EXE_LINKER_FLAGS="-L${ROCM_PATH}/lib/ -L${ROCM_PATH}/llvm/lib/ -lrocsparse -lrocrand" + ``` + - Compile AMG2023 (in AMG2023/build) + ```sh + make install + ``` diff --git a/AMG2023/run_frontier_16.sh b/AMG2023/run_frontier_16.sh new file mode 100644 index 0000000..c51b52d --- /dev/null +++ b/AMG2023/run_frontier_16.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH -N 16 +#SBATCH -n 128 +#SBATCH -q normal +#SBATCH -J amg +#SBATCH --gpu-bind none +#SBATCH -t 00:30:00 +#SBATCH -A csc569 +#SBATCH --output /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/%x-%j/output-AMG2023.log +#SBATCH --error /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/%x-%j/error-AMG2023.log +#SBATCH --exclusive +# Run like: sbatch run_frontier_16.sh + +OUTPUT_DIR=/lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/$SLURM_JOB_NAME-$SLURM_JOB_ID +OUTPUT_FILE=$OUTPUT_DIR/output-AMG2023.log +ERROR_FILE=$OUTPUT_DIR/error-AMG2023.log + +# Run gpu benchmarks +COMM_TYPE=mpi +ROCM_VERSION=6.1.3 +PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability +echo running allreduce benchmark +bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allreduce/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR +# echo running allgather benchmark +# bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allgather/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR +echo running gemm benchmark +bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/gemm/run_frontier.sh $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR + +APP_ROOT=/ccs/home/keshprad/AMG2023 +cd $APP_ROOT + +# reset modules +echo resetting modules: +module reset +# load modules +echo loading modules: +module load cray-mpich/8.1.30 +module load craype-accel-amd-gfx90a +module load rocm/6.1.3 + +export MPICH_GPU_SUPPORT_ENABLED=1 +export CRAY_ACCEL_TARGET=gfx90a +export HYPRE_INSTALL_DIR=/ccs/home/keshprad/hypre/src/hypre/ +# mpiP +export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH +export MPIP="-o -f $OUTPUT_DIR" + +# log start date +echo start AMG2023: $(date) +# define command +cmd="srun --output $OUTPUT_FILE --error $ERROR_FILE \ + ./build/amg -P 4 4 8 -n 128 64 64 -problem 1 -iter 500" +echo solving: +echo $cmd +$cmd +# log end date +echo end AMG2023: $(date) diff --git a/AMG2023/run_frontier_64.sh b/AMG2023/run_frontier_64.sh new file mode 100644 index 0000000..c7a7a3e --- /dev/null +++ b/AMG2023/run_frontier_64.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#SBATCH -N 64 +#SBATCH -n 512 +#SBATCH -q normal +#SBATCH -J amg +#SBATCH --gpu-bind none +#SBATCH -t 00:30:00 +#SBATCH -A csc569 +#SBATCH --output /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/%x-%j/output-AMG2023.log +#SBATCH --error /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/%x-%j/error-AMG2023.log +#SBATCH --exclusive +# Run like: sbatch run_frontier_64.sh + +OUTPUT_DIR=/lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/$SLURM_JOB_NAME-$SLURM_JOB_ID +OUTPUT_FILE=$OUTPUT_DIR/output-AMG2023.log +ERROR_FILE=$OUTPUT_DIR/error-AMG2023.log + +# Run gpu benchmarks +COMM_TYPE=mpi +ROCM_VERSION=6.1.3 +PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability +echo running allreduce benchmark +bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allreduce/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR +# echo running allgather benchmark +# bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allgather/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR +echo running gemm benchmark +bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/gemm/run_frontier.sh $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR + +APP_ROOT=/ccs/home/keshprad/AMG2023 +cd $APP_ROOT + +# reset modules +echo resetting modules: +module reset +# load modules +echo loading modules: +module load cray-mpich/8.1.30 +module load craype-accel-amd-gfx90a +module load rocm/6.1.3 + +export MPICH_GPU_SUPPORT_ENABLED=1 +export CRAY_ACCEL_TARGET=gfx90a +export HYPRE_INSTALL_DIR=/ccs/home/keshprad/hypre/src/hypre/ +# mpiP +export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH +export MPIP="-o -f $OUTPUT_DIR" + +# log start date +echo start AMG2023: $(date) +# define command +cmd="srun --output $OUTPUT_FILE --error $ERROR_FILE \ + ./build/amg -P 8 8 8 -n 128 64 64 -problem 1 -iter 500" +echo solving: +echo $cmd +$cmd +# log end date +echo end AMG2023: $(date) diff --git a/AMG2023/run_frontier_crontab.sh b/AMG2023/run_frontier_crontab.sh new file mode 100644 index 0000000..09b0f66 --- /dev/null +++ b/AMG2023/run_frontier_crontab.sh @@ -0,0 +1,19 @@ +#!/bin/bash +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi +# `16` or `64` +NUM_NODES=$1 + +PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability + +# load lmod +source /usr/share/lmod/lmod/init/bash +# load default LMOD_SYSTEM_DEFAULT_MODULES and MODULEPATH +export LMOD_SYSTEM_DEFAULT_MODULES=craype-x86-trento:craype-network-ofi:perftools-base:xpmem:cray-pmi:PrgEnv-cray:DefApps +export MODULEPATH=/sw/frontier/spack-envs/modules/cce/17.0.0/cray-mpich-8.1.28/cce-17.0.0:/sw/frontier/spack-envs/modules/cce/17.0.0/cce-17.0.0:/sw/frontier/spack-envs/modules/Core/24.07:/opt/cray/pe/lmod/modulefiles/mpi/crayclang/17.0/ofi/1.0/cray-mpich/8.0:/opt/cray/pe/lmod/modulefiles/comnet/crayclang/17.0/ofi/1.0:/opt/cray/pe/lmod/modulefiles/compiler/crayclang/17.0:/opt/cray/pe/lmod/modulefiles/mix_compilers:/opt/cray/pe/lmod/modulefiles/perftools/23.12.0:/opt/cray/pe/lmod/modulefiles/net/ofi/1.0:/opt/cray/pe/lmod/modulefiles/cpu/x86-trento/1.0:/opt/cray/pe/modulefiles/Linux:/opt/cray/pe/modulefiles/Core:/opt/cray/pe/lmod/lmod/modulefiles/Core:/opt/cray/pe/lmod/modulefiles/core:/opt/cray/pe/lmod/modulefiles/craype-targets/default:/sw/frontier/modulefiles:/opt/cray/modulefiles + +# run sbatch script +script=$PERF_VARIABILITY_ROOT/AMG2023/run_frontier_$NUM_NODES\.sh +sbatch $script \ No newline at end of file diff --git a/gpu-benchmarks/README.md b/gpu-benchmarks/README.md new file mode 100644 index 0000000..c8f9c25 --- /dev/null +++ b/gpu-benchmarks/README.md @@ -0,0 +1,14 @@ +# gpu-benchmarks README +Code Repository: [gpu-benchmarks](#TODO:) + +## Perlmutter Compilation + +### Steps to Compile + +TODO: + +## Frontier Compilation + +### Steps to Compile + +TODO: \ No newline at end of file diff --git a/gpu-benchmarks/allgather/run_frontier.sh b/gpu-benchmarks/allgather/run_frontier.sh new file mode 100644 index 0000000..7fc10b4 --- /dev/null +++ b/gpu-benchmarks/allgather/run_frontier.sh @@ -0,0 +1,63 @@ +# This script assumes it is being run by another sbatch script, +# so does not include portions for SBATCH vars (e.g. account, time, etc.) + +# run like: bash /ccs/home/keshprad/gpu-benchmarks/benchmark/frontier/allgather.sh + +#!/bin/bash +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + exit 1 +fi +# `mpi` or `rccl` +COMM_TYPE=$1 +# `5.7.1` or `6.1.3` +ROCM_VERSION=$2 +# `16` or `64` +NUM_NODES=$3 +# output directory +OUTPUT_DIR=$4 + +# setup cray-mpich version +if [[ "$ROCM_VERSION" == "6.1.3" ]]; then + MPICH_VERSION=8.1.30 +else + MPICH_VERSION=8.1.28 +fi + +OUTPUT_FILE=$OUTPUT_DIR/output-allgather.log + +{ + # reset modules + echo resetting modules: + module reset + # load modules + echo loading modules: + module load PrgEnv-cray craype-accel-amd-gfx90a cpe/23.05 amd/${ROCM_VERSION} + module load cray-mpich/${MPICH_VERSION} + module load rocm/${ROCM_VERSION} + module list + + GPU_BENCHMARKS_ROOT=/lustre/orion/csc569/scratch/keshprad/gpu-benchmarks + EXEC=$GPU_BENCHMARKS_ROOT/allgather_$COMM_TYPE\_rocm-${ROCM_VERSION}.x + NUM_TASKS=$(($NUM_NODES * 8)) + MIN_MSG_SIZE=$((1 * 1024)) + MAX_MSG_SIZE=$((1 * 1024 * 1024)) + ITERATIONS=100 + + export MPICH_GPU_SUPPORT_ENABLED=1 + export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}" + + echo start allgather: $(date) + For MPI-bench we should use --gpus-per-node --gpus-per-task --ntasks-per-node , and --gpu-bind=none in srun. + CMD="srun -N $NUM_NODES -n $NUM_TASKS \ + --gpus-per-node 8 \ + --gpus-per-task 1 \ + --ntasks-per-node 8 \ + --gpu-bind none \ + --output $OUTPUT_FILE \ + $EXEC $NUM_TASKS $MIN_MSG_SIZE $MAX_MSG_SIZE $ITERATIONS" + echo running: + echo $CMD + $CMD + echo end allgather: $(date) +} &>> $OUTPUT_FILE diff --git a/gpu-benchmarks/allreduce/run_frontier.sh b/gpu-benchmarks/allreduce/run_frontier.sh new file mode 100644 index 0000000..855a486 --- /dev/null +++ b/gpu-benchmarks/allreduce/run_frontier.sh @@ -0,0 +1,58 @@ +# This script assumes it is being run by another sbatch script, +# so does not include portions for SBATCH vars (e.g. account, time, etc.) + +# run like: bash /ccs/home/keshprad/gpu-benchmarks/benchmark/frontier/allreduce.sh + +#!/bin/bash +if [ "$#" -ne 4 ]; then + echo "Usage: $0 " + exit 1 +fi +# `mpi` or `rccl` +COMM_TYPE=$1 +# `5.7.1` or `6.1.3` +ROCM_VERSION=$2 +# `16` or `64` +NUM_NODES=$3 +# output directory +OUTPUT_DIR=$4 + +# setup cray-mpich version +if [[ "$ROCM_VERSION" == "6.1.3" ]]; then + MPICH_VERSION=8.1.30 +else + MPICH_VERSION=8.1.28 +fi + +OUTPUT_FILE=$OUTPUT_DIR/output-allreduce.log + +{ + # reset modules + echo resetting modules: + module reset + # load modules + echo loading modules: + module load PrgEnv-cray craype-accel-amd-gfx90a cpe/23.05 amd/${ROCM_VERSION} + module load cray-mpich/${MPICH_VERSION} + module load rocm/${ROCM_VERSION} + module list + + GPU_BENCHMARKS_ROOT=/lustre/orion/csc569/scratch/keshprad/gpu-benchmarks + EXEC=$GPU_BENCHMARKS_ROOT/allreduce_$COMM_TYPE\_rocm-${ROCM_VERSION}.x + NUM_TASKS=$(($NUM_NODES * 8)) + MIN_MSG_SIZE=$((1 * 1024)) + MAX_MSG_SIZE=$((1 * 1024 * 1024)) + ITERATIONS=100 + + export MPICH_GPU_SUPPORT_ENABLED=1 + export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}" + + echo start allreduce: $(date) + CMD="srun -N $NUM_NODES -n $NUM_TASKS \ + --output $OUTPUT_FILE \ + $EXEC $NUM_TASKS $MIN_MSG_SIZE $MAX_MSG_SIZE $ITERATIONS" + echo running: + echo $CMD + $CMD + echo end allreduce: $(date) +} &>> $OUTPUT_FILE diff --git a/gpu-benchmarks/gemm/run_frontier.sh b/gpu-benchmarks/gemm/run_frontier.sh new file mode 100644 index 0000000..c5348be --- /dev/null +++ b/gpu-benchmarks/gemm/run_frontier.sh @@ -0,0 +1,56 @@ +# This script assumes it is being run by another sbatch script, +# so does not include portions for SBATCH vars (e.g. account, time, etc.) + +# run like: bash /ccs/home/keshprad/gpu-benchmarks/benchmark/frontier/gemm.sh + +#!/bin/bash +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi +# `5.7.1` or `6.1.3` +ROCM_VERSION=$1 +# `16` or `64` +NUM_NODES=$2 +# output directory +OUTPUT_DIR=$3 + +# setup cray-mpich version +if [[ "$ROCM_VERSION" == "6.1.3" ]]; then + MPICH_VERSION=8.1.30 +else + MPICH_VERSION=8.1.28 +fi + +OUTPUT_FILE=$OUTPUT_DIR/output-gemm.log + +{ + # reset modules + echo resetting modules: + module reset + # load modules + echo loading modules: + module load PrgEnv-cray craype-accel-amd-gfx90a cpe/23.05 amd/${ROCM_VERSION} + module load cray-mpich/${MPICH_VERSION} + module load rocm/${ROCM_VERSION} + module list + + GPU_BENCHMARKS_ROOT=/lustre/orion/csc569/scratch/keshprad/gpu-benchmarks + EXEC=$GPU_BENCHMARKS_ROOT/matmul/frontier/gemm_rocm-${ROCM_VERSION}.x + NUM_TASKS=$(($NUM_NODES * 8)) + + export MPICH_GPU_SUPPORT_ENABLED=1 + export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}" + + echo start gemm: $(date) + CMD="srun -N $NUM_NODES -n $NUM_TASKS \ + --gpus-per-node 8 \ + --gpus-per-task 1 \ + --ntasks-per-node 8 \ + --output $OUTPUT_FILE \ + $EXEC" + echo running: + echo $CMD + $CMD + echo end gemm: $(date) +} &>> $OUTPUT_FILE