From bb168691414dbc952bacc0391513d198f32c5b27 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Thu, 19 Jun 2025 20:00:55 +0200 Subject: [PATCH 01/14] Removed commits from preprocess target PR --- CMakeLists.txt | 2 +- env.sh | 4 ++-- env/levante.dkrz.de/shell.nvhpc | 7 ++++--- src/CMakeLists.txt | 13 +++++++------ src/ice_fct.F90 | 4 ++++ 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0787ca401..28c4c5f49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/env.sh b/env.sh index e6cf2ce6b..294ddb4c8 100755 --- a/env.sh +++ b/env.sh @@ -54,7 +54,7 @@ elif [[ $LOGINHOST =~ ^m[A-Za-z0-9]+\.hpc\.dkrz\.de$ ]]; then STRATEGY="mistral.dkrz.de" elif [[ $LOGINHOST =~ ^levante ]] || [[ $LOGINHOST =~ ^l[:alnum:]+\.lvt\.dkrz\.de$ ]]; then STRATEGY="levante.dkrz.de" - # following regex only matches if input is 2 word like levante.nvhpc + # following regex only matches if input is 2 word like levante.nvhpc, this enables using different shells for a machine directly compid_regex="^([[:alnum:]]+)\.([[:alnum:]]+)$" if [[ $LOGINHOST =~ $compid_regex ]]; then COMPILERID="${BASH_REMATCH[2]}" @@ -119,4 +119,4 @@ else echo "Sourcing $(realpath $SHELLFILE) for environment" source $SHELLFILE echo "$(realpath ${SHELLFILE})" > $DIR/bin/current_shell_path -fi +fi \ No newline at end of file diff --git a/env/levante.dkrz.de/shell.nvhpc b/env/levante.dkrz.de/shell.nvhpc index eb2b776f6..5bf73e871 100755 --- a/env/levante.dkrz.de/shell.nvhpc +++ b/env/levante.dkrz.de/shell.nvhpc @@ -5,9 +5,10 @@ export CPU_MODEL=AMD_EPYC_ZEN3 module --force purge # module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 # module load openmpi/4.1.2-intel-2021.5.0 -module load nvhpc/22.5-gcc-11.2.0 -module load openmpi/.4.1.4-nvhpc-22.5 +module load nvhpc/23.9-gcc-11.2.0 +module load openmpi/4.1.6-nvhpc-23.9 export FC=mpif90 CC=mpicc CXX=mpicxx; +# export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 @@ -28,4 +29,4 @@ export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self export UCX_UNIFIED_MODE=y export HDF5_USE_FILE_LOCKING=FALSE export OMPI_MCA_io="romio321" -export UCX_HANDLE_ERRORS=bt +export UCX_HANDLE_ERRORS=bt \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1f72aeb75..154fe0148 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -397,16 +397,17 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) - target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) + target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) if(ENABLE_OPENACC) # additional compiler settings - target_compile_options(${PROJECT_NAME} PRIVATE -acc -ta=tesla:${NV_GPU_ARCH} -Minfo=accel) - set(CMAKE_EXE_LINKER_FLAGS "-acc -ta=tesla:${NV_GPU_ARCH}") + message("Taking ENABLE_OPENACC = ON") + target_compile_options(${PROJECT_NAME} PRIVATE -O2 -acc -gpu=${NV_GPU_ARCH} -Minfo=accel) + set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${NV_GPU_ARCH}") endif() if(ENABLE_OPENMP) target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) - else() - target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) + # else() + # target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) endif() endif() @@ -486,4 +487,4 @@ endif() ### Export and installation -fesom_export(TARGETS ${PROJECT_NAME} fesom.x ${additional_targets}) +fesom_export(TARGETS ${PROJECT_NAME} fesom.x ${additional_targets}) \ No newline at end of file diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index 5e0b87093..fd1548ac0 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -1128,7 +1128,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) call exchange_nod(ice_temp, partit, luse_g2g = .true.) #endif +#ifndef ENABLE_OPENACC +!$OMP PARALLEL DO +#else !$ACC END DATA +#endif !$OMP BARRIER end subroutine ice_fem_fct From 8d77d941b387ca66a21b4e01a6da373bd9238734 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Fri, 17 May 2024 14:46:36 +0200 Subject: [PATCH 02/14] Working OpenACC code after commenting 'set(CMAKE_EXE_LINKER_FLAGS)' --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28c4c5f49..5905cdc94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 154fe0148..9d86a93e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -75,8 +75,9 @@ endif() option(ENABLE_OPENACC "compile with OpenACC support" OFF) message(STATUS "ENABLE_OPENACC: ${ENABLE_OPENACC}") - -set(NV_GPU_ARCH "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +option(DISABLE_OPENACC_ATOMICS "disable kernels using atomic statement for reproducible results" ON) +set(GPU_COMPUTE_CAPABILITY "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") @@ -384,7 +385,7 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU ) elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) #target_compile_options(${PROJECT_NAME} PRIVATE -c -emf -hbyteswapio -hflex_mp=conservative -hfp1 -hadd_paren -Ounroll0 -hipa0 -r am -s real64 -N 1023 -g -G2 -O3) target_compile_options(${PROJECT_NAME} PRIVATE -c -emf -hbyteswapio -hflex_mp=conservative -hfp1 -hadd_paren -Ounroll0 -hipa0 -r am -s real64 -N 1023 -g -G2 -O2 -hnoacc -M878) #-hnoacc is a workaround for cray automatically activate -hacc, -M878 is to suppress ftn-878 warning - if(ENABLE_OPENMP) + if(${ENABLE_OPENMP}) target_compile_options(${PROJECT_NAME} PRIVATE -homp) else() target_compile_options(${PROJECT_NAME} PRIVATE -hnoomp) @@ -397,14 +398,19 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) + #target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) - if(ENABLE_OPENACC) + if(${ENABLE_OPENACC}) # additional compiler settings message("Taking ENABLE_OPENACC = ON") - target_compile_options(${PROJECT_NAME} PRIVATE -O2 -acc -gpu=${NV_GPU_ARCH} -Minfo=accel) - set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${NV_GPU_ARCH}") + target_compile_options(${PROJECT_NAME} PRIVATE -acc -O2 -gpu=${GPU_FLAGS} -Minfo=accel) + # set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") + if(${DISABLE_OPENACC_ATOMICS}) + message("Taking DISABLE_OPENACC_ATOMICS = ON") + target_compile_definitions(${PROJECT_NAME} PRIVATE DISABLE_OPENACC_ATOMICS) + endif() endif() - if(ENABLE_OPENMP) + if(${ENABLE_OPENMP}) target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) # else() # target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) From 5e5d394355f7ad6035c5886fa92c74ebf5949246 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 21 May 2024 13:13:04 +0200 Subject: [PATCH 03/14] Removed some commits from #697 PR --- CMakeLists.txt | 2 +- env.sh | 2 +- env/levante.dkrz.de/shell.nvhpc | 2 +- src/CMakeLists.txt | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5905cdc94..0787ca401 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/env.sh b/env.sh index 294ddb4c8..9d246137f 100755 --- a/env.sh +++ b/env.sh @@ -119,4 +119,4 @@ else echo "Sourcing $(realpath $SHELLFILE) for environment" source $SHELLFILE echo "$(realpath ${SHELLFILE})" > $DIR/bin/current_shell_path -fi \ No newline at end of file +fi diff --git a/env/levante.dkrz.de/shell.nvhpc b/env/levante.dkrz.de/shell.nvhpc index 5bf73e871..f0ae54531 100755 --- a/env/levante.dkrz.de/shell.nvhpc +++ b/env/levante.dkrz.de/shell.nvhpc @@ -29,4 +29,4 @@ export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self export UCX_UNIFIED_MODE=y export HDF5_USE_FILE_LOCKING=FALSE export OMPI_MCA_io="romio321" -export UCX_HANDLE_ERRORS=bt \ No newline at end of file +export UCX_HANDLE_ERRORS=bt diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d86a93e1..372fa211e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -81,7 +81,7 @@ set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nv option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") -if(ENABLE_OPENMP) +if(${ENABLE_OPENMP}) find_package(OpenMP REQUIRED COMPONENTS Fortran) endif() @@ -203,7 +203,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC $ Date: Tue, 16 Jul 2024 14:39:18 +0200 Subject: [PATCH 04/14] Updated with Miguels's comment --- src/ice_fct.F90 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index fd1548ac0..be4d92518 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -1128,9 +1128,7 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) call exchange_nod(ice_temp, partit, luse_g2g = .true.) #endif -#ifndef ENABLE_OPENACC -!$OMP PARALLEL DO -#else +#ifdef ENABLE_OPENACC !$ACC END DATA #endif From 980a6882e7a599ba90b25eb2bd5b2a9a030790e5 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Wed, 17 Jul 2024 16:11:05 +0200 Subject: [PATCH 05/14] Now, we can compile on levante with intel and gnu --- env/levante.dkrz.de/shell.intel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/levante.dkrz.de/shell.intel b/env/levante.dkrz.de/shell.intel index e087c7f84..777e55f0d 100755 --- a/env/levante.dkrz.de/shell.intel +++ b/env/levante.dkrz.de/shell.intel @@ -9,7 +9,7 @@ module load openmpi/4.1.2-intel-2021.5.0 export FC=mpif90 CC=mpicc CXX=mpicxx ; spack load intel-oneapi-mkl@2022.0.1%gcc@11.2.0 # this handles adding to path elegantly then using hardcoded path below #module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 -#export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 From 6613e8852ca7d2b4f43ef86225b5d1776a9339bb Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Wed, 17 Jul 2024 16:17:17 +0200 Subject: [PATCH 06/14] Added suvi's changes back to env.sh (compilerId regex for levante) --- env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env.sh b/env.sh index 9d246137f..e6cf2ce6b 100755 --- a/env.sh +++ b/env.sh @@ -54,7 +54,7 @@ elif [[ $LOGINHOST =~ ^m[A-Za-z0-9]+\.hpc\.dkrz\.de$ ]]; then STRATEGY="mistral.dkrz.de" elif [[ $LOGINHOST =~ ^levante ]] || [[ $LOGINHOST =~ ^l[:alnum:]+\.lvt\.dkrz\.de$ ]]; then STRATEGY="levante.dkrz.de" - # following regex only matches if input is 2 word like levante.nvhpc, this enables using different shells for a machine directly + # following regex only matches if input is 2 word like levante.nvhpc compid_regex="^([[:alnum:]]+)\.([[:alnum:]]+)$" if [[ $LOGINHOST =~ $compid_regex ]]; then COMPILERID="${BASH_REMATCH[2]}" From 1c537815f4a010bf460553eb0cd377213ad5ec00 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 26 Nov 2024 14:45:20 +0100 Subject: [PATCH 07/14] Solved some errors caused in oce_adv_tra_driver.F90 --- src/oce_adv_tra_driver.F90 | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/oce_adv_tra_driver.F90 b/src/oce_adv_tra_driver.F90 index 8ef6e9a4c..017c7f6b8 100644 --- a/src/oce_adv_tra_driver.F90 +++ b/src/oce_adv_tra_driver.F90 @@ -243,7 +243,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, #ifndef ENABLE_OPENACC !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) #else - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) #endif !$OMP DO do n=1, myDim_edge2D @@ -254,8 +254,12 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, !$ACC END LOOP end do !$OMP END DO +#ifndef ENABLE_OPENACC +#else !$ACC END PARALLEL LOOP - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) +#endif + + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) !$OMP DO do n=1, myDim_nod2D !$ACC LOOP VECTOR @@ -351,7 +355,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, !_______________________________________________________________________ if (trim(tracers%data(tr_num)%tra_adv_lim)=='FCT') then !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) !$OMP DO do n=1, myDim_edge2D !$ACC LOOP VECTOR @@ -365,7 +369,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, !$OMP END DO !$ACC END PARALLEL LOOP - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) !$OMP DO do n=1, myDim_nod2D !$ACC LOOP VECTOR @@ -384,7 +388,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, !_______________________________________________________________________ else !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) !$OMP DO do n=1, myDim_edge2D !$ACC LOOP VECTOR @@ -396,7 +400,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, !$OMP END DO !$ACC END PARALLEL LOOP - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) !$OMP DO do n=1, myDim_nod2D !$ACC LOOP VECTOR From 5af17ab56789b7e3678cf84b6b579b9ddfb6d048 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 25 Feb 2025 12:12:44 +0100 Subject: [PATCH 08/14] Working OpenACC code --- src/CMakeLists.txt | 10 ++++++---- src/ice_EVP.F90 | 27 +++++++++++++++++---------- src/ice_fct.F90 | 10 ++++++---- src/oce_tracer_mod.F90 | 6 +++++- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 372fa211e..9bf3360ef 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,7 +77,8 @@ option(ENABLE_OPENACC "compile with OpenACC support" OFF) message(STATUS "ENABLE_OPENACC: ${ENABLE_OPENACC}") option(DISABLE_OPENACC_ATOMICS "disable kernels using atomic statement for reproducible results" ON) set(GPU_COMPUTE_CAPABILITY "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") -set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +set(GPU_FLAGS "${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler") +# set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") @@ -398,13 +399,14 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) - #target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) - target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) + target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) + # target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) if(${ENABLE_OPENACC}) # additional compiler settings message("Taking ENABLE_OPENACC = ON") + message(STATUS "GPU_FLAGS = ${GPU_FLAGS}") target_compile_options(${PROJECT_NAME} PRIVATE -acc -O2 -gpu=${GPU_FLAGS} -Minfo=accel) - # set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") if(${DISABLE_OPENACC_ATOMICS}) message("Taking DISABLE_OPENACC_ATOMICS = ON") target_compile_definitions(${PROJECT_NAME} PRIVATE DISABLE_OPENACC_ATOMICS) diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index b646bc6da..36666af31 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -225,13 +225,16 @@ subroutine stress2rhs(ice, partit, mesh) #else !$ACC END PARALLEL LOOP #endif -#if !defined(DISABLE_OPENACC_ATOMICS) - !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) -#else - !$ACC UPDATE SELF(u_rhs_ice, v_rhs_ice, sigma11, sigma12, sigma22) -#endif + #ifndef ENABLE_OPENACC !$OMP DO +#else + !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) + #if !defined(DISABLE_OPENACC_ATOMICS) + !$ACC ATOMIC UPDAATE + #else + !$ACC UPDATE SELF(u_rhs_ice, v_rhs_ice, sigma11, sigma12, sigma22) + #endif #endif do el=1,myDim_elem2D ! ===== Skip if ice is absent @@ -251,16 +254,20 @@ subroutine stress2rhs(ice, partit, mesh) !$OMP ORDERED #endif #endif +#ifdef ENABLE_OPENACC #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC ATOMIC UPDATE +#endif #endif U_rhs_ice(elem2D_nodes(k,el)) = U_rhs_ice(elem2D_nodes(k,el)) & - elem_area(el) * & (sigma11(el)*gradient_sca(k,el) + sigma12(el)*gradient_sca(k+3,el) & +sigma12(el)*val3*metric_factor(el)) !metrics +#ifdef ENABLE_OPENACC #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC ATOMIC UPDATE +#endif #endif V_rhs_ice(elem2D_nodes(k,el)) = V_rhs_ice(elem2D_nodes(k,el)) & - elem_area(el) * & @@ -278,15 +285,15 @@ subroutine stress2rhs(ice, partit, mesh) endif end do #ifdef ENABLE_OPENACC -#if !defined(DISABLE_OPENACC_ATOMICS) - !$ACC END PARALLEL LOOP -#else + #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC UPDATE DEVICE(u_rhs_ice, v_rhs_ice) + #endif #endif -#endif #ifndef ENABLE_OPENACC !$OMP END DO +#else + !$ACC END PARALLEL LOOP #endif #ifndef ENABLE_OPENACC @@ -824,4 +831,4 @@ subroutine EVPdynamics(ice, partit, mesh) !endif END DO !--> do shortstep=1, ice%evp_rheol_steps -end subroutine EVPdynamics +end subroutine EVPdynamics \ No newline at end of file diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index be4d92518..6f7e639a8 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -570,15 +570,17 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif ! Auxiliary elemental operator (mass matrix- lumped mass matrix) - !$ACC KERNELS - icoef = 1 - !$ACC END KERNELS + #ifdef ENABLE_OPENACC !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) + #endif + icoef = 1 do n=1,3 ! three upper nodes ! Cycle over rows row=elnodes(n) icoef(n,n)=-2 end do + #ifdef ENABLE_OPENACC !$ACC END PARALLEL LOOP + #endif #ifndef ENABLE_OPENACC @@ -1618,4 +1620,4 @@ subroutine ice_update_for_div(ice, partit, mesh) !$ACC END PARALLEL LOOP #endif end subroutine ice_update_for_div -! ============================================================= +! ============================================================= \ No newline at end of file diff --git a/src/oce_tracer_mod.F90 b/src/oce_tracer_mod.F90 index b74a55d84..f4b99b2f3 100755 --- a/src/oce_tracer_mod.F90 +++ b/src/oce_tracer_mod.F90 @@ -25,7 +25,9 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) type(t_tracer), intent(inout), target :: tracers integer :: n,nz + #ifdef ENABLE_OPENACC !$ACC parallel loop collapse(2) default(present) !!!async(1) +#endif do n=1, partit%myDim_nod2D+partit%eDim_nod2D do nz=1, mesh%nl-1 ! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning! @@ -34,7 +36,9 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) tracers%work%del_ttf_advvert (nz, n) = 0.0_WP end do end do + #ifdef ENABLE_OPENACC !$ACC end parallel loop +#endif !$OMP PARALLEL DO do n=1, partit%myDim_nod2D+partit%eDim_nod2D ! AB interpolation @@ -220,4 +224,4 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh) !$OMP END PARALLEL DO END IF END SUBROUTINE relax_to_clim -END MODULE o_tracers +END MODULE o_tracers \ No newline at end of file From 90e19b6c7e297cff9ba5071fb3ba5db1daaa7d39 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 25 Feb 2025 12:55:21 +0100 Subject: [PATCH 09/14] Removed indent for #ifdef and #ifndef ENALBE_OPENACC and respective #endif loops --- src/CMakeLists.txt | 1 - src/ice_EVP.F90 | 10 +++++----- src/ice_fct.F90 | 6 +++--- src/oce_tracer_mod.F90 | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9bf3360ef..bdcd95749 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -404,7 +404,6 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) if(${ENABLE_OPENACC}) # additional compiler settings message("Taking ENABLE_OPENACC = ON") - message(STATUS "GPU_FLAGS = ${GPU_FLAGS}") target_compile_options(${PROJECT_NAME} PRIVATE -acc -O2 -gpu=${GPU_FLAGS} -Minfo=accel) set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") if(${DISABLE_OPENACC_ATOMICS}) diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index 36666af31..3768c751a 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -230,11 +230,11 @@ subroutine stress2rhs(ice, partit, mesh) !$OMP DO #else !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) - #if !defined(DISABLE_OPENACC_ATOMICS) +#if !defined(DISABLE_OPENACC_ATOMICS) !$ACC ATOMIC UPDAATE - #else +#else !$ACC UPDATE SELF(u_rhs_ice, v_rhs_ice, sigma11, sigma12, sigma22) - #endif +#endif #endif do el=1,myDim_elem2D ! ===== Skip if ice is absent @@ -285,9 +285,9 @@ subroutine stress2rhs(ice, partit, mesh) endif end do #ifdef ENABLE_OPENACC - #if !defined(DISABLE_OPENACC_ATOMICS) +#if !defined(DISABLE_OPENACC_ATOMICS) !$ACC UPDATE DEVICE(u_rhs_ice, v_rhs_ice) - #endif +#endif #endif #ifndef ENABLE_OPENACC diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index 6f7e639a8..ca711173e 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -570,7 +570,7 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif ! Auxiliary elemental operator (mass matrix- lumped mass matrix) - #ifdef ENABLE_OPENACC +#ifdef ENABLE_OPENACC !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) #endif icoef = 1 @@ -578,9 +578,9 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) ! Cycle over rows row=elnodes(n) icoef(n,n)=-2 end do - #ifdef ENABLE_OPENACC +#ifdef ENABLE_OPENACC !$ACC END PARALLEL LOOP - #endif +#endif #ifndef ENABLE_OPENACC diff --git a/src/oce_tracer_mod.F90 b/src/oce_tracer_mod.F90 index f4b99b2f3..1a0fe05d4 100755 --- a/src/oce_tracer_mod.F90 +++ b/src/oce_tracer_mod.F90 @@ -25,7 +25,7 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) type(t_tracer), intent(inout), target :: tracers integer :: n,nz - #ifdef ENABLE_OPENACC +#ifdef ENABLE_OPENACC !$ACC parallel loop collapse(2) default(present) !!!async(1) #endif do n=1, partit%myDim_nod2D+partit%eDim_nod2D @@ -36,7 +36,7 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) tracers%work%del_ttf_advvert (nz, n) = 0.0_WP end do end do - #ifdef ENABLE_OPENACC +#ifdef ENABLE_OPENACC !$ACC end parallel loop #endif !$OMP PARALLEL DO From 2acb0c31b52bc99630f1406e54b26562b6ccafd4 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 25 Feb 2025 13:00:04 +0100 Subject: [PATCH 10/14] Removed one more indent --- src/ice_fct.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index ca711173e..d3ede9c94 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -572,7 +572,7 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #ifdef ENABLE_OPENACC !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) - #endif +#endif icoef = 1 do n=1,3 ! three upper nodes ! Cycle over rows row=elnodes(n) From 714a8ee074c722858448d2b7bbd577df5b05b7ca Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Wed, 26 Feb 2025 13:16:48 +0100 Subject: [PATCH 11/14] Added job_gpu_levante to run jobs with OpenACC on levante --- work/job_gpu_levante | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100755 work/job_gpu_levante diff --git a/work/job_gpu_levante b/work/job_gpu_levante new file mode 100755 index 000000000..04680163e --- /dev/null +++ b/work/job_gpu_levante @@ -0,0 +1,75 @@ +#!/bin/bash +#SBATCH --job-name=fesom_gpu_test +#SBATCH --partition=gpu +#SBATCH --nodes=4 # Specify number of nodes +#SBATCH --ntasks-per-node=4 +#SBATCH --cpus-per-task=4 +#SBATCH --gpus=16 # 4 # 8 for 2 nodes +#SBATCH --gpus-per-task=1 #specific case when tasks=gpues +#SBATCH --exclusive +#SBATCH --mem=0 # Request all memory available on all nodes +#SBATCH --time=00:20:00 # Set a limit on the total run time +#SBATCH -o slurm.out +#SBATCH -e slurm.err +#SBATCH --account=ab0995 + +set -e +export SLURM_CPUS_PER_TASK=4 + +source /sw/etc/profile.levante +#source ../env/levante.dkrz.de/shell +read -r USED_SHELL <../bin/current_shell_path +source $USED_SHELL + +#source /work/ab0995/a270232/refactoring/fesom2/env/levante.dkrz.de/shell.nvhpc +echo "using environment from" $USED_SHELL + +ulimit -s 204800 # https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html + +echo Submitted job: $jobid +squeue -u $USER + +# Check GPUs available for the job +nvidia-smi + +# determine JOBID +JOBID=$(echo $SLURM_JOB_ID | cut -d"." -f1) + +rm -f fesom.x +ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x + +export OMP_NUM_THREADS=4 +cp -n ../config/namelist.config . +cp -n ../config/namelist.forcing . +cp -n ../config/namelist.oce . +cp -n ../config/namelist.ice . +cp -n ../config/namelist.icepack . +cp -n ../config/namelist.tra . +cp -n ../config/namelist.io . +cp -n ../config/namelist.cvmix . +cp -n ../config/namelist.dyn . + +## levante specific gpu env used for ICON otherwise segfault +export OMPI_MCA_pml=ucx # Use UCX to support InfiniBand devices and CUDA [1] + +export OMPI_MCA_btl="self" # Only use self transport to reduce overhead [2] + +export UCX_RNDV_SCHEME=put_zcopy # Preferred communication scheme with Rendezvous protocol +export UCX_RNDV_THRESH=16384 # Threshold when to switch transport from TCP to NVLINK [3] + +export UCX_IB_GPU_DIRECT_RDMA=yes # Allow remote direct memory access from/to GPU + +export UCX_TLS=cma,rc,mm,cuda_ipc,cuda_copy,gdr_copy # Include cuda and gdr based transport layers for communication [4] + +export UCX_MEMTYPE_CACHE=n + +date +srun -l fesom.x >fesom2.out 2>&1 #> "fesom2.0.out" 2>&1 +# srun -l nsys profile -t cuda,osrt,mpi fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1 +date + +# qstat -f $PBS_JOBID +#export EXITSTATUS=$? +#if [ ${EXITSTATUS} -eq 0 ] || [ ${EXITSTATUS} -eq 127 ] ; then +#sbatch job_mistral +#fi From 2a1f5b484004182038ae62113e590c6310470248 Mon Sep 17 00:00:00 2001 From: suvarchal Date: Tue, 11 Mar 2025 11:08:54 +0100 Subject: [PATCH 12/14] oops a trailing fortran comment bad for preprocessing (cherry picked from commit 237434cc77e60e7f6431689f793d6f8d3f3de98c) --- src/gen_modules_partitioning.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen_modules_partitioning.F90 b/src/gen_modules_partitioning.F90 index 5ea2938d0..d8aebddf3 100644 --- a/src/gen_modules_partitioning.F90 +++ b/src/gen_modules_partitioning.F90 @@ -127,7 +127,7 @@ subroutine par_ex(COMM, mype, abort) ! finalizes MPI call MPI_Finalize(error) endif -#else ! +#else ! TODO logic below is convoluted, COMM that is passed should be used for MPI_ABORT ! changes are easy but need to be tested with coupled configurations ! From here on the two coupled options From 108b15223ee997e06614198a317b897bfd0b4f88 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 17 Jun 2025 16:54:49 +0200 Subject: [PATCH 13/14] Compiling and running base OpenACC code with nvhpc. --- configure.sh | 1 - src/CMakeLists.txt | 25 +++++++++++-------------- src/ice_fct.F90 | 4 +++- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/configure.sh b/configure.sh index 690ad707d..cee7933f0 100755 --- a/configure.sh +++ b/configure.sh @@ -64,4 +64,3 @@ cmake ${SOURCE_DIR} -DCMAKE_INSTALL_PREFIX=$HERE -DCMAKE_BUILD_TYPE=Debug ${CMAK # additional cmake arguments can be passed to configure.sh # this also includes fesom specific options in CMakeLists, can be used as -DFESOM_COUPLED=ON make install -j`nproc --all` - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bdcd95749..7cdbf08fd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,8 +77,8 @@ option(ENABLE_OPENACC "compile with OpenACC support" OFF) message(STATUS "ENABLE_OPENACC: ${ENABLE_OPENACC}") option(DISABLE_OPENACC_ATOMICS "disable kernels using atomic statement for reproducible results" ON) set(GPU_COMPUTE_CAPABILITY "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") -set(GPU_FLAGS "${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler") -# set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +# set(GPU_FLAGS "${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler") +set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") @@ -399,22 +399,19 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) - target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) - # target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) if(${ENABLE_OPENACC}) - # additional compiler settings - message("Taking ENABLE_OPENACC = ON") - target_compile_options(${PROJECT_NAME} PRIVATE -acc -O2 -gpu=${GPU_FLAGS} -Minfo=accel) - set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") + target_compile_options(${PROJECT_NAME} PRIVATE + $<$:-Mallocatable=95 -Mr8 -pgf90libs -Mnofma -Minfo=all -acc=verystrict -gpu=math_uniform,cuda12.2,cc80> + $<$:-Mallocatable=95 -Mr8 -pgf90libs -Minfo=all -acc=verystrict -gpu=cc80>) + set(CMAKE_EXE_LINKER_FLAGS_DEBUG "-acc=verystrict -Mnofma -gpu=math_uniform,cuda12.2,cc80") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "-acc=verystrict -gpu=cc80") if(${DISABLE_OPENACC_ATOMICS}) - message("Taking DISABLE_OPENACC_ATOMICS = ON") - target_compile_definitions(${PROJECT_NAME} PRIVATE DISABLE_OPENACC_ATOMICS) + target_compile_definitions(${PROJECT_NAME} PRIVATE DISABLE_OPENACC_ATOMICS) endif() - endif() - if(${ENABLE_OPENMP}) - target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) else() - target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) + target_compile_options(${PROJECT_NAME} PRIVATE + $<$:-Mallocatable=95 -Mr8 -pgf90libs -Mnofma> + $<$:-Mallocatable=95 -Mr8 -pgf90libs>) endif() endif() diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index d3ede9c94..c72269579 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -570,10 +570,12 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif ! Auxiliary elemental operator (mass matrix- lumped mass matrix) + ! do we need to make the entire array of icoef equal to 1 ? + ! if so, we have to write another loop for that. For now, I am running it on cpu. + icoef = 1 #ifdef ENABLE_OPENACC !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) #endif - icoef = 1 do n=1,3 ! three upper nodes ! Cycle over rows row=elnodes(n) icoef(n,n)=-2 From 5b7447f87b15eda746805ca9fcf9cfed478fef72 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Thu, 19 Jun 2025 16:22:44 +0200 Subject: [PATCH 14/14] Removed the commented export in shell.nvhpc --- env/levante.dkrz.de/shell.nvhpc | 1 - 1 file changed, 1 deletion(-) diff --git a/env/levante.dkrz.de/shell.nvhpc b/env/levante.dkrz.de/shell.nvhpc index f0ae54531..5f9bf063b 100755 --- a/env/levante.dkrz.de/shell.nvhpc +++ b/env/levante.dkrz.de/shell.nvhpc @@ -8,7 +8,6 @@ module --force purge module load nvhpc/23.9-gcc-11.2.0 module load openmpi/4.1.6-nvhpc-23.9 export FC=mpif90 CC=mpicc CXX=mpicxx; -# export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0