Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 781d34f

Browse files
authored
Fixes and improvements from LLVM/XLC work. (#716)
Code fixes for XLC and Clang execution without build system changes. This mainly adds missing OpenMP pragmas and makes cnrn_target_ wrappers visible to NMODL.
1 parent 78081b4 commit 781d34f

File tree

13 files changed

+85
-60
lines changed

13 files changed

+85
-60
lines changed

CMake/MakefileBuildOptions.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" _BUILD_TYPE)
7575
set(CORENRN_CXX_FLAGS
7676
"${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${_BUILD_TYPE}} ${CXX14_STD_FLAGS} ${NVHPC_ACC_COMP_FLAGS} ${NVHPC_CXX_INLINE_FLAGS}"
7777
)
78+
set(CORENRN_LD_FLAGS "${NVHPC_ACC_LINK_FLAGS}")
7879

7980
# =============================================================================
8081
# nmodl/mod2c related options : TODO

coreneuron/gpu/nrn_acc_manager.cpp

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,14 @@
2424
#include "coreneuron/mpi/nrnmpidec.h"
2525
#include "coreneuron/utils/utils.hpp"
2626

27+
#ifdef CRAYPAT
28+
#include <pat_api.h>
29+
#endif
30+
2731
#ifdef _OPENACC
2832
#include <openacc.h>
2933
#endif
30-
#ifdef CORENEURON_PREFER_OPENMP_OFFLOAD
31-
#include <omp.h>
32-
#endif
3334

34-
#ifdef CRAYPAT
35-
#include <pat_api.h>
36-
#endif
3735
namespace coreneuron {
3836
extern InterleaveInfo* interleave_info;
3937
void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div);
@@ -43,51 +41,6 @@ void nrn_ion_global_map_delete_from_device();
4341
void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay);
4442
void nrn_VecPlay_delete_from_device(NrnThread* nt);
4543

46-
template <typename T>
47-
T* cnrn_target_deviceptr(const T* h_ptr) {
48-
#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC)
49-
return static_cast<T*>(acc_deviceptr(const_cast<T*>(h_ptr)));
50-
#elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
51-
return static_cast<T*>(omp_get_mapped_ptr(const_cast<T*>(h_ptr), omp_get_default_device()));
52-
#else
53-
throw std::runtime_error("cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build");
54-
#endif
55-
}
56-
57-
template <typename T>
58-
T* cnrn_target_copyin(const T* h_ptr, std::size_t len = 1) {
59-
#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC)
60-
return static_cast<T*>(acc_copyin(const_cast<T*>(h_ptr), len * sizeof(T)));
61-
#elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
62-
#pragma omp target enter data map(to:h_ptr[:len])
63-
return cnrn_target_deviceptr(const_cast<T*>(h_ptr));
64-
#else
65-
throw std::runtime_error("cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build");
66-
#endif
67-
}
68-
69-
template <typename T>
70-
void cnrn_target_delete(T* h_ptr, std::size_t len = 1) {
71-
#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC)
72-
acc_delete(h_ptr, len * sizeof(T));
73-
#elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
74-
#pragma omp target exit data map(delete: h_ptr[:len])
75-
#else
76-
throw std::runtime_error("cnrn_target_delete() not implemented without OpenACC/OpenMP and gpu build");
77-
#endif
78-
}
79-
80-
template <typename T>
81-
void cnrn_target_memcpy_to_device(T* d_ptr, const T* h_ptr, std::size_t len = 1) {
82-
#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC)
83-
acc_memcpy_to_device(d_ptr, const_cast<T*>(h_ptr), len * sizeof(T));
84-
#elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
85-
omp_target_memcpy(d_ptr, const_cast<T*>(h_ptr), len* sizeof(T), 0, 0, omp_get_default_device(), omp_get_initial_device());
86-
#else
87-
throw std::runtime_error("cnrn_target_memcpy_to_device() not implemented without OpenACC/OpenMP and gpu build");
88-
#endif
89-
}
90-
9144
/* note: threads here are corresponding to global nrn_threads array */
9245
void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
9346
#ifdef _OPENACC

coreneuron/kinderiv.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ def write_out_kinderiv(fout):
5959
fout.write("\n/* declarations */\n")
6060
fout.write("\nnamespace coreneuron {\n")
6161

62+
if deriv or kin or euler:
63+
fout.write('nrn_pragma_omp(declare target)\n')
64+
6265
for item in deriv:
6366
fout.write('#pragma acc routine seq\n')
6467
fout.write('extern int %s%s(_threadargsproto_);\n' % (item[0], item[1]))
@@ -73,6 +76,9 @@ def write_out_kinderiv(fout):
7376
fout.write('#pragma acc routine seq\n')
7477
fout.write('extern int %s%s(_threadargsproto_);\n' % (item[0], item[1]))
7578

79+
if deriv or kin or euler:
80+
fout.write('nrn_pragma_omp(end declare target)\n')
81+
7682
fout.write("\n/* callback indices */\n")
7783
derivoffset = 1
7884
kinoffset = 1

coreneuron/mechanism/eion.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ double nrn_nernst(double ci, double co, double z, double celsius) {
177177
}
178178
}
179179

180+
nrn_pragma_omp(declare target)
180181
void nrn_wrote_conc(int type,
181182
double* p1,
182183
int p2,
@@ -193,6 +194,7 @@ void nrn_wrote_conc(int type,
193194
pe[0] = nrn_nernst(pe[1 * _STRIDE], pe[2 * _STRIDE], gimap[type][2], celsius);
194195
}
195196
}
197+
nrn_pragma_omp(end declare target)
196198

197199
static double efun(double x) {
198200
if (fabs(x) < 1e-4) {

coreneuron/mechanism/mech/dimplic.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "coreneuron/mechanism/mech/mod2c_core_thread.hpp"
2525
#include "_kinderiv.h"
2626
namespace coreneuron {
27+
nrn_pragma_omp(declare target)
2728
int derivimplicit_thread(int n, int* slist, int* dlist, DIFUN fun, _threadargsproto_) {
2829
difun(fun);
2930
return 0;
@@ -48,5 +49,6 @@ int nrn_kinetic_steer(int fun, SparseObj* so, double* rhs, _threadargsproto_) {
4849
switch (fun) { _NRN_KINETIC_CASES }
4950
return 0;
5051
}
52+
nrn_pragma_omp(end declare target)
5153

5254
} // namespace coreneuron

coreneuron/mechanism/register_mech.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
namespace coreneuron {
2121
int secondorder = 0;
22+
nrn_pragma_omp(declare target)
2223
double t, dt, celsius, pi;
24+
nrn_pragma_omp(end declare target)
2325
int rev_dt;
2426

2527
using Pfrv = void (*)();

coreneuron/network/cvodestb.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,14 @@ void fixed_play_continuous(NrnThread* nt) {
8686

8787
// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc"
8888
// for the ISPC backend. If changes are required, make sure to change ISPC as well.
89+
nrn_pragma_omp(declare target)
8990
int at_time(NrnThread* nt, double te) {
9091
double x = te - 1e-11;
9192
if (x <= nt->_t && x > (nt->_t - nt->_dt)) {
9293
return 1;
9394
}
9495
return 0;
9596
}
97+
nrn_pragma_omp(end declare target)
9698

9799
} // namespace coreneuron

coreneuron/network/netcvode.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ void NetCvode::check_thresh(NrnThread* nt) { // for default method
537537
nrn_pragma_acc(parallel loop present(
538538
nt [0:1], presyns_helper [0:nt->n_presyn], presyns [0:nt->n_presyn], actual_v [0:nt->end])
539539
copy(net_send_buf_count) if (nt->compute_gpu) async(nt->stream_id))
540-
nrn_pragma_omp(target teams distribute parallel for simd map(tofrom: net_send_buf_count) if(nt->compute_gpu))
540+
nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu))
541541
for (int i = 0; i < nt->ncell; ++i) {
542542
PreSyn* ps = presyns + i;
543543
PreSynHelper* psh = presyns_helper + i;

coreneuron/sim/scopmath/crout_thread.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ namespace coreneuron {
5050
#define ix(arg) ((arg) *_STRIDE)
5151

5252
/* having a differnt permutation per instance may not be a good idea */
53+
nrn_pragma_omp(declare target)
5354
int nrn_crout_thread(NewtonSpace* ns, int n, double** a, int* perm, _threadargsproto_) {
5455
int save_i = 0;
5556

@@ -224,4 +225,5 @@ void nrn_scopmath_solve_thread(int n,
224225
}
225226
}
226227
}
228+
nrn_pragma_omp(end declare target)
227229
} // namespace coreneuron

coreneuron/sim/scopmath/newton_thread.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ namespace coreneuron {
5959
#define ix(arg) ((arg) *_STRIDE)
6060
#define s_(arg) _p[s[arg] * _STRIDE]
6161

62+
nrn_pragma_omp(declare target)
6263
int nrn_newton_thread(NewtonSpace* ns,
6364
int n,
6465
int* s,
@@ -136,6 +137,7 @@ int nrn_newton_thread(NewtonSpace* ns,
136137

137138
return (error);
138139
}
140+
nrn_pragma_omp(end declare target)
139141

140142
/*------------------------------------------------------------*/
141143
/* */

0 commit comments

Comments
 (0)