Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions COMPILATION/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ option(STELLA_ENABLE_FFT "Enable FFTs" ON)
option(STELLA_ENABLE_NETCDF "Enable NetCDF" ON)
option(STELLA_ENABLE_DOUBLE "Promotes precisions of real and complex to double" ON)
option(STELLA_ENABLE_PETSC "Enable PETSc" OFF)
option(STELLA_ENABLE_OMP "Enable OpenMP" OFF)

# Options for utils internal library
option(STELLA_ENABLE_LOCAL_SPFUNC "Enable local special functions" OFF)
Expand Down Expand Up @@ -294,6 +295,12 @@ if (STELLA_ENABLE_PETSC)
target_link_libraries(libstella PUBLIC PETSc::PETSc)
endif()

if (STELLA_ENABLE_OMP)
find_package(OpenMP REQUIRED Fortran)
target_link_libraries(libstella PUBLIC OpenMP::OpenMP_Fortran)
target_compile_definitions(libstella PUBLIC USE_OPENMP)
endif()

# Link the <git_version> program
add_subdirectory(../EXTERNALS/git_version ${CMAKE_CURRENT_BINARY_DIR}/EXTERNALS/git_version)
target_link_libraries(libstella PUBLIC fortran_git::fortran_git)
Expand Down Expand Up @@ -412,6 +419,13 @@ else()
set(STELLA_PFUNIT_LOCATION "Not used")
endif()

# Enforce the standard
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -std08")
else()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -std=f08")
endif()

############################################################
# Configure summary

Expand Down
4 changes: 4 additions & 0 deletions COMPILATION/Makefile.compilerflags
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ USE_LOCAL_SPFUNC ?= # Use local special functions (bin)
USE_NAGLIB ?= # Use nag libraray (spfunc,undefined)
USE_SFINCS ?= # Link to sfincs library at compilation
USE_LAPACK ?= on # Use LAPACK, needed for test particle collisions
USE_OPENMP ?= on # Use OpenMP to parallelize the Code
HAS_ISO_C_BINDING ?= on # Does the compiler support the iso_c_binding features of Fortran? (needed for local parallel LU decomposition)
SPLIT_BY_NUMA ?=

Expand Down Expand Up @@ -185,6 +186,9 @@ ifdef USE_LAPACK
endif
CPPFLAGS += -DLAPACK
endif
ifdef USE_OPENMP
F90FLAGS += -fopenmp -DUSE_OPENMP
endif
ifdef USE_HDF5
FC = $(H5FC_par)
CC = $(H5CC_par)
Expand Down
7 changes: 1 addition & 6 deletions COMPILATION/Makefiles/Compilers/Makefile.gcc-intel
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,4 @@ else
F90FLAGS += -pg
CFLAGS += -pg
endif
endif

ifdef USE_OPENMP
F90FLAGS += -openmp
CFLAGS += -fopenmp
endif
endif
8 changes: 1 addition & 7 deletions COMPILATION/Makefiles/Compilers/Makefile.gnu-gfortran
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,4 @@ else
F90FLAGS += -pg
CFLAGS += -pg
endif
endif

ifdef USE_OPENMP
F90FLAGS += -fopenmp
CFLAGS += -fopenmp
endif

endif
8 changes: 1 addition & 7 deletions COMPILATION/Makefiles/Makefile.g95
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,4 @@ else
F90FLAGS += -pg
CFLAGS += -pg
endif
endif

ifdef USE_OPENMP
F90FLAGS += -fopenmp
CFLAGS += -fopenmp
endif

endif
6 changes: 0 additions & 6 deletions COMPILATION/Makefiles/Makefile.gnu-g95
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,3 @@ else
CFLAGS += -pg
endif
endif

ifdef USE_OPENMP
F90FLAGS += -fopenmp
CFLAGS += -fopenmp
endif

4 changes: 0 additions & 4 deletions COMPILATION/Makefiles/Makefile.gnu-gcc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,3 @@ else
endif
endif

ifdef USE_OPENMP
CFLAGS += -fopenmp
endif

8 changes: 8 additions & 0 deletions STELLA_CODE/arrays/arrays_gyro_averages.f90
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ subroutine init_bessel_versus_mu_aj0v_aj1v
! Calculate the Bessel functions J_0(x) and J_1(x) on the (nmu) grid
! Note that j1 returns J_1(x)/x, not J_1(x)
if (debug) write (*, *) 'arrays_gyro_averages:init_bessel::calculate_aj0v_aj1v'
!$omp parallel default(none) &
!$omp firstprivate(kxkyz_lo, ia) &
!$omp private(ikxkyz, iky, ikx, iz, is, imu, arg) &
!$omp shared(spec, vperp2, kperp2, nmu, bmag, aj0v, aj1v)

!$omp do
do ikxkyz = kxkyz_lo%llim_proc, kxkyz_lo%ulim_proc
iky = iky_idx(kxkyz_lo, ikxkyz)
ikx = ikx_idx(kxkyz_lo, ikxkyz)
Expand All @@ -171,6 +177,8 @@ subroutine init_bessel_versus_mu_aj0v_aj1v

end do
end do
!$omp end do
!$omp end parallel

end subroutine init_bessel_versus_mu_aj0v_aj1v

Expand Down
29 changes: 28 additions & 1 deletion STELLA_CODE/calculations/calculations_transforms.f90
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,11 @@ module calculations_transforms
! Arrays for transforming from alpha-space to k-alpha space
real, dimension(:), allocatable :: fft_alpha_alpha ! dimension: nalpha/2+1
complex, dimension(:), allocatable :: fft_alpha_kalpha ! dimension: nalpha

! Only initialise the FFTW plans once
logical :: transforms_initialized = .false.

!$omp threadprivate(fft_y_in,fft_y_out,fft_x_k,fft_x_x,fft_xs_k,fft_xs_x,fft_ys_k,fft_ys_y,fftnp_x_k,fftnp_x_x,fftnp_y_k,fftnp_y_y,fft_alpha_alpha,fft_alpha_kalpha)

contains

Expand Down Expand Up @@ -396,6 +398,10 @@ subroutine transform_ky2y_5d(gky_unpad, gy)
iky_max = vmu_lo%naky
ipad_up = iky_max + vmu_lo%ny - (2 * vmu_lo%naky - 1)


if (.not. allocated(fft_y_in)) allocate (fft_y_in(vmu_lo%ny))
if (.not. allocated(fft_y_out)) allocate (fft_y_out(vmu_lo%ny))

! Iterate over vmu, tubes, z and positive kx since we Fourier transform along ny
do ivmu = vmu_lo%llim_proc, vmu_lo%ulim_proc
do it = 1, vmu_lo%ntubes
Expand Down Expand Up @@ -446,6 +452,10 @@ subroutine transform_ky2y_2d(gky_unpad, gy)
iky_max = vmu_lo%naky
ipad_up = iky_max + vmu_lo%ny - (2 * vmu_lo%naky - 1)


if (.not. allocated(fft_y_in)) allocate (fft_y_in(vmu_lo%ny))
if (.not. allocated(fft_y_out)) allocate (fft_y_out(vmu_lo%ny))

! Iterate over positive kx since we Fourier transform along ny
do ikx = 1, vmu_lo%nakx / 2 + 1
! Pad the middle of the array with zeros: zero padding in the wavenumber
Expand Down Expand Up @@ -535,6 +545,11 @@ subroutine transform_y2ky_2d(gy, gky)
! Get the indices of the last positive ky <iky_max> and negative ky <ipad_up>
iky_max = vmu_lo%naky
ipad_up = iky_max + vmu_lo%ny - (2 * vmu_lo%naky - 1)


if (.not. allocated(fft_y_in)) allocate (fft_y_in(vmu_lo%ny))
if (.not. allocated(fft_y_out)) allocate (fft_y_out(vmu_lo%ny))


! Iterate over positive kx since we Fourier transform along ny
do ikx = 1, vmu_lo%nakx / 2 + 1
Expand Down Expand Up @@ -571,6 +586,10 @@ subroutine transform_kx2x(gkx, gx)

integer :: iy


if (.not. allocated(fft_x_k)) allocate (fft_x_k(vmu_lo%nx / 2 + 1))
if (.not. allocated(fft_x_x)) allocate (fft_x_x(vmu_lo%nx))

! Iterate over ny since we Fourier transform along nx
do iy = 1, vmu_lo%ny
! Pad the remainder of the array with zeros (since the real array is longer
Expand Down Expand Up @@ -605,6 +624,10 @@ subroutine transform_x2kx(gx, gkx)

integer :: iy


if (.not. allocated(fft_x_k)) allocate (fft_x_k(vmu_lo%nx / 2 + 1))
if (.not. allocated(fft_x_x)) allocate (fft_x_x(vmu_lo%nx))

! Iterate over ny since we Fourier transform gx(ny,nx) along nx
do iy = 1, vmu_lo%ny
! Fill in the array with gx(ny,nx)
Expand Down Expand Up @@ -641,6 +664,10 @@ subroutine transform_kx2x_xfirst(gkx, gx)
ikx_max = vmu_lo%nakx / 2 + 1
ipad_up = ikx_max + vmu_lo%nx - vmu_lo%nakx


if (.not. allocated(fft_xs_k)) allocate (fft_xs_k(vmu_lo%nx))
if (.not. allocated(fft_xs_x)) allocate (fft_xs_x(vmu_lo%nx))

! Iterate over naky since we Fourier transform gkx(nakx,nakx) along nx
do iky = 1, vmu_lo%naky
! Pad the middle of the array with zeros: zero padding in the wavenumber
Expand Down
20 changes: 14 additions & 6 deletions STELLA_CODE/gyrokinetic_equation/gk_ExB_nonlinearity.f90
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ subroutine advance_ExB_nonlinearity(g, gout, restart_time_step, istep)
complex, dimension(:, :), allocatable :: g0kxy, g0xky, prefac
real, dimension(:, :), allocatable :: g0xy, g1xy, bracket
complex, dimension(:, :), allocatable :: tmp
integer :: ivmu, iz, it, imu, is
integer :: ivmu, iz, it
real :: zero, cfl_dt
logical :: yfirst

Expand Down Expand Up @@ -143,12 +143,14 @@ subroutine advance_ExB_nonlinearity(g, gout, restart_time_step, istep)
if (include_apar .or. include_bpar) call g_to_h(g, phi, bpar, fphi)

! Iterate over velocity space
!$omp parallel default(none) &
!$omp firstprivate(vmu_lo, shift_state, exb_nonlin_fac, cfl_dt_ExB, naky,nakx, ny ,rho_clamped, exb_nonlin_fac_p, ikx_max, code_dt, tmp) &
!$omp private(g0k, g0xy, g0a, g1xy, bracket, g0xky, g0k_swap, g0kxy, apar_kykx, bpar_kykx) &
!$omp shared(g, gout, include_apar,phi_gyro, include_bpar, gfac, apar, phi, bpar, aky, ntubes, full_flux_surface, suppress_zonal_interaction,radial_variation, prp_shear_enabled, hammett_flow_shear, g_exb, g_exbfac, prefac, yfirst, akx, fphi, phi_corr_GA, phi_corr_qn, zero, nzgrid)
!$omp do collapse(3)
do ivmu = vmu_lo%llim_proc, vmu_lo%ulim_proc
imu = imu_idx(vmu_lo, ivmu)
is = is_idx(vmu_lo, ivmu)
do it = 1, ntubes
do iz = -nzgrid, nzgrid

! Get the fields (to avoid indexing arrays which aren't allocated)
if (include_apar) apar_kykx = apar(:, :, iz, it)
if (include_bpar) bpar_kykx = bpar(:, :, iz, it)
Expand Down Expand Up @@ -270,6 +272,8 @@ subroutine advance_ExB_nonlinearity(g, gout, restart_time_step, istep)
end do
end do
end do
!$omp end do
!$omp end parallel

! Convert back from h to g = <f> (only needed for EM sims)
if (include_apar .or. include_bpar) call g_to_h(g, phi, bpar, -fphi)
Expand Down Expand Up @@ -340,7 +344,11 @@ subroutine forward_transform(gk, gx)
real, dimension(:, :), intent(out) :: gx

!----------------------------------------------------------------------

!$omp critical (forward_transform)
! There is some shared state namely the g0k_swap, g0xky and g0kxy arrays.
! The approach is as always: make if work first, before trying to do premature optimzizations.
! Even though the crtical might impede performance, we should measure first, before getting rid of it

! Fourier transform g(kx,ky) to g(kx,y) and then to g(x,y)
if (yfirst) then
! We have i*ky*g(kx,ky) for ky >= 0 and all kx.
Expand All @@ -365,7 +373,7 @@ subroutine forward_transform(gk, gx)
g0xky = g0xky * prefac
call transform_ky2y_xfirst(g0xky, gx)
end if

!$omp end critical (forward_transform)
end subroutine forward_transform

end subroutine advance_ExB_nonlinearity
Expand Down
5 changes: 4 additions & 1 deletion STELLA_CODE/gyrokinetic_equation/gk_ffs_solve.f90
Original file line number Diff line number Diff line change
Expand Up @@ -313,14 +313,17 @@ subroutine get_dgdy(gin, dgdy)
integer :: it, iz, ikx

!-------------------------------------------------------------------------

!$omp parallel default(none) &
!$omp shared(gin,dgdy,aky,nzgrid,nakx,ntubes)
!$omp do collapse(3)
do it = 1, ntubes
do iz = -nzgrid, nzgrid
do ikx = 1, nakx
dgdy(:, ikx, iz, it) = zi * aky(:) * gin(:, ikx, iz, it)
end do
end do
end do
!$omp end parallel

end subroutine get_dgdy

Expand Down
19 changes: 19 additions & 0 deletions STELLA_CODE/gyrokinetic_equation/gk_implicit_terms.f90
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,10 @@ subroutine update_pdf(mod)
! Start the timer for the pdf update
if (proc0) call time_message(.false., time_implicit_advance(:, 2), ' (bidiagonal solve)')

!$omp parallel default(none) &
!$omp private(ivmu, iky, it, ie, nz_ext, pdf1, pdf2, phiext, bparext, aparext, aparext_new, aparext_old, phiffsext, ulim) &
!$omp shared(vmu_lo, neigen, ntubes, naky, nsegments, nzed_segment, g1, phi_source, apar_source, apar, apar_old, bpar_source, phi_source_ffs, include_apar, include_bpar, mod, g)
!$omp do collapse(3)
do ivmu = vmu_lo%llim_proc, vmu_lo%ulim_proc
! Solve for the pdf, given the sources for phi and the pdf on the RHS of the GK equation
! we do this on set of connected zed segments at a time
Expand Down Expand Up @@ -383,13 +387,17 @@ subroutine update_pdf(mod)
call sweep_g_zext(iky, ie, it, ivmu, pdf2)
! Map the pdf 'pdf2' from the extended zed domain
! to the standard zed domain; the mapped pdf is called 'g'
!$omp critical (write_back)
call map_from_extended_zgrid(it, ie, iky, pdf2, g(iky, :, :, :, ivmu))
!$omp end critical (write_back)
deallocate (pdf1, pdf2, phiext, aparext, aparext_new, aparext_old, bparext)
if (present(mod)) deallocate(phiffsext)
end do
end do
end do
end do
!$omp end do
!$omp end parallel

! Stop the timer for the pdf update
if (proc0) call time_message(.false., time_implicit_advance(:, 2), ' (bidiagonal solve)')
Expand Down Expand Up @@ -1494,6 +1502,10 @@ subroutine invert_parstream_response(phi, apar, bpar)
if (debug) write (*, *) 'implicit_solve::invert_parstream_response'

! Put the fields onto the extended zed grid and use LU back substitution
!$omp parallel default(none) &
!$omp private(iky, ie, it, ulim, ikx, offset_apar, offset_bpar, nresponse_per_field, nresponse, nzp, fld_ext, phi_ext, apar_ext, bpar_ext, fld) &
!$omp shared(phi,ntubes, periodic, naky, nsegments, nzed_segment, response_matrix, nzgrid, nfields, include_apar, include_bpar, ikxmod, apar, bpar, phase_shift, neigen)
!$omp do
do iky = 1, naky
! Avoid double counting of periodic endpoints for zonal (and any other periodic) modes
if (periodic(iky)) then
Expand Down Expand Up @@ -1572,16 +1584,22 @@ subroutine invert_parstream_response(phi, apar, bpar)

! Get phi_ext contribution from fld_ext and map back to normal (z, kx) grid
phi_ext = fld_ext(:nresponse_per_field)
!$omp critical (write_phi)
call map_from_extended_zgrid(it, ie, iky, phi_ext, phi(iky, :, :, :))
!$omp end critical (write_phi)

! If advancing apar, get apar_ext from fld_ext and map back to (z, kx) grid
if (include_apar) then
apar_ext = fld_ext(offset_apar + 1:nresponse_per_field + offset_apar)
!$omp critical (write_apar)
call map_from_extended_zgrid(it, ie, iky, apar_ext, apar(iky, :, :, :))
!$omp end critical (write_apar)
end if
if (include_bpar) then
bpar_ext = fld_ext(offset_bpar + 1:nresponse_per_field + offset_bpar)
!$omp critical (write_bpar)
call map_from_extended_zgrid(it, ie, iky, bpar_ext, bpar(iky, :, :, :))
!$omp end critical (write_bpar)
end if

deallocate (fld_ext)
Expand All @@ -1592,6 +1610,7 @@ subroutine invert_parstream_response(phi, apar, bpar)
end do
end if
end do
!$omp end parallel

end subroutine invert_parstream_response

Expand Down
Loading