Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/interpolate/batch_interpolate_1d.f90
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,14 @@ end subroutine store_coeff_1d
subroutine destroy_batch_splines_1d(spl)
type(BatchSplineData1D), intent(inout) :: spl

#ifdef _OPENACC
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
end if
end if
#endif
#ifdef _OPENACC
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
end if
#endif
if (allocated(spl%coeff)) deallocate (spl%coeff)
end subroutine destroy_batch_splines_1d

Expand Down
142 changes: 55 additions & 87 deletions src/interpolate/batch_interpolate_2d.f90
Original file line number Diff line number Diff line change
Expand Up @@ -221,16 +221,15 @@ subroutine construct_batch_splines_2d_resident_device_impl(x_min, x_max, n1, n2,
logical, intent(in) :: do_assume_present

integer :: istat
integer :: N1_order, N2_order
integer :: order1, order2
integer :: i1, i2, iq, k1, k2
integer :: line, line2
! GCC OpenACC bug workaround: keep work arrays persistent across calls
real(dp), allocatable, save :: work2(:, :, :)
real(dp), allocatable, save :: work1(:, :, :)
real(dp) :: h1, h2
logical :: periodic1, periodic2
logical :: work_needs_alloc
integer :: N1_order, N2_order
integer :: order1, order2
integer :: i1, i2, iq, k1, k2
integer :: line, line2
! Work arrays - allocated fresh each call to avoid GCC OpenACC memory issues
real(dp), allocatable :: work2(:, :, :)
real(dp), allocatable :: work1(:, :, :)
real(dp) :: h1, h2
logical :: periodic1, periodic2

N1_order = order(1)
N2_order = order(2)
Expand Down Expand Up @@ -294,74 +293,42 @@ subroutine construct_batch_splines_2d_resident_device_impl(x_min, x_max, n1, n2,
spl%x_min = x_min
spl%num_quantities = n_quantities

! GCC OpenACC bug workaround: reuse existing allocation if possible
if (.not. allocated(spl%coeff)) then
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, n1, n2), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)
else if (size(spl%coeff, 1) /= n_quantities .or. &
size(spl%coeff, 2) /= N1_order + 1 .or. &
size(spl%coeff, 3) /= N2_order + 1 .or. &
size(spl%coeff, 4) /= n1 .or. &
size(spl%coeff, 5) /= n2) then
#ifdef _OPENACC
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
end if
#endif
deallocate (spl%coeff)
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, n1, n2), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)
end if

! GCC OpenACC bug workaround: check if work arrays need (re)allocation
! Must check BOTH work arrays since their sizes depend on different orders
work_needs_alloc = .not. allocated(work2)
if (allocated(work2)) then
if (size(work2, 1) /= n2 .or. &
size(work2, 2) /= n1*n_quantities .or. &
size(work2, 3) /= N2_order + 1 .or. &
size(work1, 1) /= n1 .or. &
size(work1, 2) /= n2*n_quantities .or. &
size(work1, 3) /= N1_order + 1) then
! Always allocate fresh - no reuse to avoid GCC OpenACC memory issues
#ifdef _OPENACC
if (acc_is_present(work2)) then
!$acc exit data delete(work2, work1)
end if
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
deallocate (spl%coeff)
end if
#else
if (allocated(spl%coeff)) deallocate (spl%coeff)
#endif
deallocate (work2, work1)
work_needs_alloc = .true.
end if
end if

if (work_needs_alloc) then
allocate (work2(n2, n1*n_quantities, 0:N2_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for work2"
end if
allocate (work1(n1, n2*n_quantities, 0:N1_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for work1"
end if
end if
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, n1, n2), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)

allocate (work2(n2, n1*n_quantities, 0:N2_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for work2"
end if
allocate (work1(n1, n2*n_quantities, 0:N1_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_2d_resident_device:"// &
" Allocation failed for work1"
end if

h1 = spl%h_step(1)
h2 = spl%h_step(2)

#ifdef _OPENACC
block
if (work_needs_alloc) then
!$acc enter data create(work2, work1)
end if
#ifdef _OPENACC
block
!$acc enter data create(work2, work1)

! GCC OpenACC bug workaround: first parallel loop MUST write to spl%coeff
!$acc parallel loop collapse(3) gang present(work2, spl%coeff) &
Expand Down Expand Up @@ -415,26 +382,27 @@ subroutine construct_batch_splines_2d_resident_device_impl(x_min, x_max, n1, n2,
end do
end do

! GCC bug workaround: keep work arrays mapped for next call
end block
#endif
!$acc exit data delete(work2, work1)
!$acc wait
end block
#endif

! GCC bug workaround: keep work arrays allocated for next call
if (do_update) then
!$acc update self(spl%coeff(1:n_quantities, 0:N1_order, 0:N2_order, 1:n1, 1:n2))
end if
end subroutine construct_batch_splines_2d_resident_device_impl
if (do_update) then
!$acc update self(spl%coeff(1:n_quantities, 0:N1_order, 0:N2_order, 1:n1, 1:n2))
end if
end subroutine construct_batch_splines_2d_resident_device_impl

subroutine destroy_batch_splines_2d(spl)
type(BatchSplineData2D), intent(inout) :: spl

#ifdef _OPENACC
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
end if
end if
#endif
#ifdef _OPENACC
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
end if
#endif
if (allocated(spl%coeff)) deallocate (spl%coeff)
end subroutine destroy_batch_splines_2d

Expand Down
110 changes: 34 additions & 76 deletions src/interpolate/batch_interpolate_3d.f90
Original file line number Diff line number Diff line change
Expand Up @@ -610,14 +610,12 @@ subroutine construct_batch_splines_3d_resident_device_impl(x_min, x_max, n1, n2,
integer :: order1, order2, order3
integer :: i1, i2, i3, iq, k1, k2, k3
integer :: line, line2, line3
! GCC OpenACC bug workaround: keep work arrays persistent across calls
! to avoid address reuse that triggers the mapping bug
real(dp), allocatable, save :: work3(:, :, :)
real(dp), allocatable, save :: work2(:, :, :)
real(dp), allocatable, save :: work1(:, :, :)
! Work arrays - allocated fresh each call to avoid GCC OpenACC memory issues
real(dp), allocatable :: work3(:, :, :)
real(dp), allocatable :: work2(:, :, :)
real(dp), allocatable :: work1(:, :, :)
real(dp) :: h1, h2, h3
logical :: periodic1, periodic2, periodic3
logical :: work_needs_alloc

N1_order = order(1)
N2_order = order(2)
Expand Down Expand Up @@ -701,79 +699,40 @@ subroutine construct_batch_splines_3d_resident_device_impl(x_min, x_max, n1, n2,
spl%x_min = x_min
spl%num_quantities = n_quantities

! GCC OpenACC bug workaround: reuse existing allocation if possible
! to avoid repeated map/unmap cycle that crashes on iteration 2
if (.not. allocated(spl%coeff)) then
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, 0:N3_order, &
n1, n2, n3), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)
else if (size(spl%coeff, 1) /= n_quantities .or. &
size(spl%coeff, 2) /= N1_order + 1 .or. &
size(spl%coeff, 3) /= N2_order + 1 .or. &
size(spl%coeff, 4) /= N3_order + 1 .or. &
size(spl%coeff, 5) /= n1 .or. &
size(spl%coeff, 6) /= n2 .or. &
size(spl%coeff, 7) /= n3) then
! Size mismatch - need to reallocate
! Always allocate fresh - no reuse to avoid GCC OpenACC memory issues
#ifdef _OPENACC
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
#endif
deallocate (spl%coeff)
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, 0:N3_order, &
n1, n2, n3), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)
end if
! If already allocated with right size, reuse (GCC bug workaround)

! GCC OpenACC bug workaround: check if work arrays need (re)allocation
! Must check ALL three work arrays since their sizes depend on different orders
work_needs_alloc = .not. allocated(work3)
if (allocated(work3)) then
if (size(work3, 1) /= n3 .or. &
size(work3, 2) /= n1*n2*n_quantities .or. &
size(work3, 3) /= N3_order + 1 .or. &
size(work2, 1) /= n2 .or. &
size(work2, 2) /= n1*n3*n_quantities .or. &
size(work2, 3) /= N2_order + 1 .or. &
size(work1, 1) /= n1 .or. &
size(work1, 2) /= n2*n3*n_quantities .or. &
size(work1, 3) /= N1_order + 1) then
#ifdef _OPENACC
if (acc_is_present(work3)) then
!$acc exit data delete(work3, work2, work1)
end if
#else
if (allocated(spl%coeff)) deallocate (spl%coeff)
#endif
deallocate (work3, work2, work1)
work_needs_alloc = .true.
end if
allocate (spl%coeff(n_quantities, 0:N1_order, 0:N2_order, 0:N3_order, &
n1, n2, n3), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for coeff"
end if
!$acc enter data create(spl%coeff)

if (work_needs_alloc) then
allocate (work3(n3, n1*n2*n_quantities, 0:N3_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work3"
end if
allocate (work2(n2, n1*n3*n_quantities, 0:N2_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work2"
end if
allocate (work1(n1, n2*n3*n_quantities, 0:N1_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work1"
end if
allocate (work3(n3, n1*n2*n_quantities, 0:N3_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work3"
end if
allocate (work2(n2, n1*n3*n_quantities, 0:N2_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work2"
end if
allocate (work1(n1, n2*n3*n_quantities, 0:N1_order), stat=istat)
if (istat /= 0) then
error stop "construct_batch_splines_3d_resident_device:"// &
" Allocation failed for work1"
end if

h1 = spl%h_step(1)
Expand All @@ -782,9 +741,7 @@ subroutine construct_batch_splines_3d_resident_device_impl(x_min, x_max, n1, n2,

#ifdef _OPENACC
block
if (work_needs_alloc) then
!$acc enter data create(work3, work2, work1)
end if
!$acc enter data create(work3, work2, work1)

! Copy input data to work3
!$acc parallel present(work3) &
Expand Down Expand Up @@ -883,11 +840,10 @@ subroutine construct_batch_splines_3d_resident_device_impl(x_min, x_max, n1, n2,
end do
end do

! GCC bug workaround: keep work arrays mapped for next call
!$acc exit data delete(work3, work2, work1)
!$acc wait
end block
#endif

! GCC bug workaround: keep work arrays allocated for next call
if (do_update) then
!$acc update self(spl%coeff(1:n_quantities, 0:N1_order, 0:N2_order, &
!$acc& 0:N3_order, 1:n1, 1:n2, 1:n3))
Expand All @@ -901,6 +857,7 @@ subroutine destroy_batch_splines_3d(spl)
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
end if
#endif
Expand All @@ -914,6 +871,7 @@ subroutine destroy_batch_splines_3d_device_only(spl)
if (allocated(spl%coeff)) then
if (acc_is_present(spl%coeff)) then
!$acc exit data delete(spl%coeff)
!$acc wait
end if
end if
#endif
Expand Down
Loading